From a356e562c70a349569f3b62cbf5ad894fb0bae4c Mon Sep 17 00:00:00 2001 From: kemihak Date: Wed, 10 Jan 2024 11:34:42 +0100 Subject: [PATCH 1/4] Add referential for output column naming --- R/zzz.R | 6 ++++++ inst/format_output/referentiel_output_name_column.csv | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 inst/format_output/referentiel_output_name_column.csv diff --git a/R/zzz.R b/R/zzz.R index e1a7f4da..10198c9f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -107,6 +107,12 @@ utils::globalVariables( "MRG. PRICE", "H. LEV", "V2", "V1") ) +# Correspondance between output column name and data table column name +pkgEnv$output_correspondance <- read.table(system.file("format_output/referentiel_output_name_column.csv", + package = "antaresRead"), + sep = ";", + header = TRUE) + #----------------------------- HDF5 ------------------------------------# diff --git a/inst/format_output/referentiel_output_name_column.csv b/inst/format_output/referentiel_output_name_column.csv new file mode 100644 index 00000000..dcf48543 --- /dev/null +++ b/inst/format_output/referentiel_output_name_column.csv @@ -0,0 +1,6 @@ +ANTARES_OUTPUT_TYPE;ANTARES_OUTPUT_FILE_COLUMN_NAME;ANTARES_OUTPUT_R_VARIABLE;ANTARES_OUTPUT_ORDINAL_POSITION +clusters;MWh;production;1 +clusters;NP Cost - Euro;NP Cost;2 +clusters;NODU;NODU;3 +clusters;Profit - Euro;profit;4 +res_clusters;MWh;production;1 From ce01e9539d7116abfe2a24ebac7195a7a0a2880a Mon Sep 17 00:00:00 2001 From: kemihak Date: Wed, 10 Jan 2024 15:48:14 +0100 Subject: [PATCH 2/4] Use referential to match the output columns --- R/importOutput.R | 75 +++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/R/importOutput.R b/R/importOutput.R index f2ae8b60..1e28689d 100644 --- a/R/importOutput.R +++ b/R/importOutput.R @@ -32,9 +32,17 @@ error = function(e) NULL) } if(!is.null(colname)){ - colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") - colname[1:2] <- c(objectName, "timeId") - colname <- gsub("^_|_EXP$|_values$|_$", "", colname) + path_elts <- unlist(strsplit(path, split = "/")) + if (startsWith(path_elts[length(path_elts)], "details-")) { + # Put a custom separator XXX to be able to split data if necessary + colname <- apply(colname[c(1:3),], 2, paste, collapse = "XXX") + colname[1:2] <- c(objectName, "timeId") + colname <- gsub("^XXX{1,}|XXXEXP$|XXXvalues$|XXX{1,}$", "", colname) + } else { + colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") + colname[1:2] <- c(objectName, "timeId") + colname <- gsub("^_|_EXP$|_values$|_$", "", colname) + } } colname @@ -277,6 +285,7 @@ ) } + #' .importOutputForClusters #' #' Private function used to import the output for the thermal clusters of one area @@ -295,26 +304,32 @@ # To improve greatly the performance we use our knowledge of the position of # the columns instead of using more general functions like dcast. reshapeFun <- function(x) { + + corr_clusters <- pkgEnv$output_correspondance + cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_TYPE") + corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_TYPE == "clusters", cols_to_keep] + # Get cluster names n <- names(x) idx <- ! n %in% pkgEnv$idVars - clusterNames <- tolower(unique(n[idx])) + clusters <- n[idx] + # Split the data with the specific separator defined in .getOutputHeader() + specific_separator <- "XXX" + outputElts <- lapply(strsplit(clusters, split = specific_separator), + function(y) list("cluster" = y[-length(y)], "var" = y[length(y)]) + ) + clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster"))) + + # output colnames + colNames <- sapply(outputElts, "[[", "var") + cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME") + corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep] + colNames <- corr_clusters[order(corr_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"] # Id vars names idVarsId <- which(!idx) idVarsNames <- n[idVarsId] - # Get final value columns - if (sum(idx) / length(clusterNames) == 4) { - colNames <- c("production", "NP Cost", "NODU", "profit") - } else if (sum(idx) / length(clusterNames) == 3) { - colNames <- c("production", "NP Cost", "NODU") - } else if (sum(idx) / length(clusterNames) == 2) { - colNames <- c("production", "NP Cost") - } else { - colNames <- c("production") - } - # Loop over clusters nclusters <- length(clusterNames) ncols <- length(colNames) @@ -457,26 +472,32 @@ # To improve greatly the performance we use our knowledge of the position of # the columns instead of using more general functions like dcast. reshapeFun <- function(x) { + + corr_res_clusters <- pkgEnv$output_correspondance + cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_TYPE") + corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_TYPE == "res_clusters", cols_to_keep] + # Get cluster names n <- names(x) idx <- ! n %in% pkgEnv$idVars - clusterNames <- tolower(unique(n[idx])) + clusters <- n[idx] + # Split the data with the specific separator defined in .getOutputHeader() + specific_separator <- "XXX" + outputElts <- lapply(strsplit(clusters, split = specific_separator), + function(y) list("cluster" = y[-length(y)], "var" = y[length(y)]) + ) + clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster"))) + + # output colnames + colNames <- sapply(outputElts, "[[", "var") + cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME") + corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep] + colNames <- corr_res_clusters[order(corr_res_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"] # Id vars names idVarsId <- which(!idx) idVarsNames <- n[idVarsId] - # Get final value columns - # Get final value columns - # colNames <- c("resProduction") - if (sum(idx) / length(clusterNames) == 3) { - colNames <- c("production", "NP Cost", "NODU") - } else if (sum(idx) / length(clusterNames) == 2) { - colNames <- c("production", "NP Cost") - } else { - colNames <- c("production") - } - # Loop over clusters nclusters <- length(clusterNames) ncols <- length(colNames) From b84a62bbaf20a031b0e649d17bf345def0e8da74 Mon Sep 17 00:00:00 2001 From: kemihak Date: Thu, 11 Jan 2024 10:17:44 +0100 Subject: [PATCH 3/4] Add unit test to check the output column names --- tests/testthat/test-readAntares.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-readAntares.R b/tests/testthat/test-readAntares.R index 611db921..a5fef918 100644 --- a/tests/testthat/test-readAntares.R +++ b/tests/testthat/test-readAntares.R @@ -28,6 +28,16 @@ sapply(studyPathS, function(studyPath){ expect_equal(nrow(clusters), 24 * 7 * nweeks * nrow(readClusterDesc())) }) + test_that("Clusters importation column names are ok", { + clusters <- readAntares(clusters = opts$areasWithClusters, + timeStep = "hourly", + mcYears = "all", + opts = opts, + showProgress = FALSE) + expect_is(clusters, "data.table") + expect_equal(setdiff(colnames(clusters),pkgEnv$idVars), c("production", "NP Cost", "NODU")) + }) + test_that("importation of different objects works", { out <- readAntares(areas = opts$areaList, links=opts$linkList, clusters=opts$areasWithClusters, showProgress= FALSE, timeStep = "annual") From b8f23babaf72354a77c03ecce60d49da7dffa523 Mon Sep 17 00:00:00 2001 From: kemihak Date: Thu, 11 Jan 2024 10:18:33 +0100 Subject: [PATCH 4/4] Add .get_values_columns_details_file to compute the output column names --- R/importOutput.R | 113 ++++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 50 deletions(-) diff --git a/R/importOutput.R b/R/importOutput.R index 1e28689d..7da193b5 100644 --- a/R/importOutput.R +++ b/R/importOutput.R @@ -32,22 +32,15 @@ error = function(e) NULL) } if(!is.null(colname)){ - path_elts <- unlist(strsplit(path, split = "/")) - if (startsWith(path_elts[length(path_elts)], "details-")) { - # Put a custom separator XXX to be able to split data if necessary - colname <- apply(colname[c(1:3),], 2, paste, collapse = "XXX") - colname[1:2] <- c(objectName, "timeId") - colname <- gsub("^XXX{1,}|XXXEXP$|XXXvalues$|XXX{1,}$", "", colname) - } else { - colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") - colname[1:2] <- c(objectName, "timeId") - colname <- gsub("^_|_EXP$|_values$|_$", "", colname) - } + colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") + colname[1:2] <- c(objectName, "timeId") + colname <- gsub("^_|_EXP$|_values$|_$", "", colname) } colname } + #' .importOutput #' #' Private function used to import the results of a simulation. The type of result @@ -286,6 +279,54 @@ } +#' .get_value_columns_details_file +#' +#' Private function used to get the column names for the details-.txt or details-res-.txt. +#' Used in .importOutputForClusters() and importOutputForResClusters() +#' +#' @return +#' a vector +#' +#' @noRd +#' +.get_value_columns_details_file <- function(opts, type) { + + ## details part + if(type == "details") { + # Order is important. There is a correspondance between elements. + all_thematic_variables <- c("DTG by plant", "NP Cost by plant", "NODU by plant") + all_output_colnames <- c("production", "NP Cost", "NODU") + if (opts$antaresVersion >= 830){ + all_thematic_variables <- c(all_thematic_variables, "Profit by plant") + all_output_colnames <- c(all_output_colnames, "profit") + } + colNames <- all_output_colnames + if ("variables selection" %in% names(opts$parameters)) { + selection_type <- unique(names(opts$parameters$`variables selection`)) + selected_variables <- unlist(opts$parameters$`variables selection`, use.names = FALSE) + # Index of the variables found in the section "variables selection" + idx_vars <- which(all_thematic_variables %in% selected_variables) + if (length(idx_vars) > 0) { + if (selection_type == "select_var -") { + # vars to remove + colNames <- colNames[-idx_vars] + } else if (selection_type == "select_var +") { + # vars to keep + colNames <- colNames[idx_vars] + } + } + } + } + + ## details-res part + if(type == "details-res") { + colNames <- c("production") + } + + return(colNames) +} + + #' .importOutputForClusters #' #' Private function used to import the output for the thermal clusters of one area @@ -305,42 +346,28 @@ # the columns instead of using more general functions like dcast. reshapeFun <- function(x) { - corr_clusters <- pkgEnv$output_correspondance - cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_TYPE") - corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_TYPE == "clusters", cols_to_keep] - # Get cluster names n <- names(x) idx <- ! n %in% pkgEnv$idVars - clusters <- n[idx] - # Split the data with the specific separator defined in .getOutputHeader() - specific_separator <- "XXX" - outputElts <- lapply(strsplit(clusters, split = specific_separator), - function(y) list("cluster" = y[-length(y)], "var" = y[length(y)]) - ) - clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster"))) - - # output colnames - colNames <- sapply(outputElts, "[[", "var") - cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME") - corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep] - colNames <- corr_clusters[order(corr_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"] + clusterNames <- tolower(unique(n[idx])) # Id vars names idVarsId <- which(!idx) idVarsNames <- n[idVarsId] + # Column names of the output table + colNames <- .get_value_columns_details_file(opts, "details") + # Loop over clusters nclusters <- length(clusterNames) - ncols <- length(colNames) res <- llply(1:nclusters, function(i) { - dt <- x[, c(nclusters * 0:(ncols - 1) + i, idVarsId), with = FALSE] + dt <- x[, c(nclusters * 0:(length(colNames) - 1) + i, idVarsId), with = FALSE] setnames(dt, c(colNames, idVarsNames)) dt[, cluster := as.factor(clusterNames[i])] dt }) - + rbindlist(res) } @@ -472,38 +499,24 @@ # To improve greatly the performance we use our knowledge of the position of # the columns instead of using more general functions like dcast. reshapeFun <- function(x) { - - corr_res_clusters <- pkgEnv$output_correspondance - cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_TYPE") - corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_TYPE == "res_clusters", cols_to_keep] # Get cluster names n <- names(x) idx <- ! n %in% pkgEnv$idVars - clusters <- n[idx] - # Split the data with the specific separator defined in .getOutputHeader() - specific_separator <- "XXX" - outputElts <- lapply(strsplit(clusters, split = specific_separator), - function(y) list("cluster" = y[-length(y)], "var" = y[length(y)]) - ) - clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster"))) - - # output colnames - colNames <- sapply(outputElts, "[[", "var") - cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME") - corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep] - colNames <- corr_res_clusters[order(corr_res_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"] + clusterNames <- tolower(unique(n[idx])) # Id vars names idVarsId <- which(!idx) idVarsNames <- n[idVarsId] + # Column names of the output table + colNames <- .get_value_columns_details_file(opts, "details-res") + # Loop over clusters nclusters <- length(clusterNames) - ncols <- length(colNames) res <- llply(1:nclusters, function(i) { - dt <- x[, c(nclusters * 0:(ncols - 1) + i, idVarsId), with = FALSE] + dt <- x[, c(nclusters * 0:(length(colNames) - 1) + i, idVarsId), with = FALSE] setnames(dt, c(colNames, idVarsNames)) dt[, cluster := as.factor(clusterNames[i])] dt