From a356e562c70a349569f3b62cbf5ad894fb0bae4c Mon Sep 17 00:00:00 2001
From: kemihak <kamel.kemiha@rte-france.com>
Date: Wed, 10 Jan 2024 11:34:42 +0100
Subject: [PATCH 1/4] Add referential for output column naming

---
 R/zzz.R                                               | 6 ++++++
 inst/format_output/referentiel_output_name_column.csv | 6 ++++++
 2 files changed, 12 insertions(+)
 create mode 100644 inst/format_output/referentiel_output_name_column.csv

diff --git a/R/zzz.R b/R/zzz.R
index e1a7f4da..10198c9f 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -107,6 +107,12 @@ utils::globalVariables(
     "MRG. PRICE", "H. LEV", "V2", "V1")
 )
 
+# Correspondance between output column name and data table column name
+pkgEnv$output_correspondance <- read.table(system.file("format_output/referentiel_output_name_column.csv", 
+                                        package = "antaresRead"),
+                            sep = ";", 
+                            header = TRUE)
+
 #-----------------------------  HDF5 ------------------------------------#
 
 
diff --git a/inst/format_output/referentiel_output_name_column.csv b/inst/format_output/referentiel_output_name_column.csv
new file mode 100644
index 00000000..dcf48543
--- /dev/null
+++ b/inst/format_output/referentiel_output_name_column.csv
@@ -0,0 +1,6 @@
+ANTARES_OUTPUT_TYPE;ANTARES_OUTPUT_FILE_COLUMN_NAME;ANTARES_OUTPUT_R_VARIABLE;ANTARES_OUTPUT_ORDINAL_POSITION
+clusters;MWh;production;1
+clusters;NP Cost - Euro;NP Cost;2
+clusters;NODU;NODU;3
+clusters;Profit - Euro;profit;4
+res_clusters;MWh;production;1

From ce01e9539d7116abfe2a24ebac7195a7a0a2880a Mon Sep 17 00:00:00 2001
From: kemihak <kamel.kemiha@rte-france.com>
Date: Wed, 10 Jan 2024 15:48:14 +0100
Subject: [PATCH 2/4] Use referential to match the output columns

---
 R/importOutput.R | 75 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 27 deletions(-)

diff --git a/R/importOutput.R b/R/importOutput.R
index f2ae8b60..1e28689d 100644
--- a/R/importOutput.R
+++ b/R/importOutput.R
@@ -32,9 +32,17 @@
                         error = function(e) NULL)
   }
   if(!is.null(colname)){
-    colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") 
-    colname[1:2] <- c(objectName, "timeId")
-    colname <- gsub("^_|_EXP$|_values$|_$", "", colname)
+    path_elts <- unlist(strsplit(path, split = "/"))
+    if (startsWith(path_elts[length(path_elts)], "details-")) {
+      # Put a custom separator XXX to be able to split data if necessary
+      colname <- apply(colname[c(1:3),], 2, paste, collapse = "XXX") 
+      colname[1:2] <- c(objectName, "timeId")
+      colname <- gsub("^XXX{1,}|XXXEXP$|XXXvalues$|XXX{1,}$", "", colname)
+    } else {
+      colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") 
+      colname[1:2] <- c(objectName, "timeId")
+      colname <- gsub("^_|_EXP$|_values$|_$", "", colname)
+    }
   }
   
   colname
@@ -277,6 +285,7 @@
   )
 }
 
+
 #' .importOutputForClusters
 #'
 #' Private function used to import the output for the thermal clusters of one area
@@ -295,26 +304,32 @@
   # To improve greatly the performance we use our knowledge of the position of 
   # the columns instead of using more general functions like dcast.
   reshapeFun <- function(x) {
+    
+    corr_clusters <- pkgEnv$output_correspondance
+    cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_TYPE")
+    corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_TYPE == "clusters", cols_to_keep]
+    
     # Get cluster names
     n <- names(x)
     idx <- ! n %in% pkgEnv$idVars
-    clusterNames <- tolower(unique(n[idx]))
+    clusters <- n[idx]
+    # Split the data with the specific separator defined in .getOutputHeader()
+    specific_separator <- "XXX"
+    outputElts <- lapply(strsplit(clusters, split = specific_separator),
+                           function(y) list("cluster" = y[-length(y)], "var" = y[length(y)])
+                         )
+    clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster")))
+		
+    # output colnames
+    colNames <- sapply(outputElts, "[[", "var")
+    cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME")
+    corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep]
+    colNames <- corr_clusters[order(corr_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"]
     
     # Id vars names
     idVarsId <- which(!idx)
     idVarsNames <- n[idVarsId]
     
-    # Get final value columns
-    if (sum(idx) / length(clusterNames) == 4) {
-      colNames <- c("production", "NP Cost", "NODU", "profit")
-    } else if (sum(idx) / length(clusterNames) == 3) {
-      colNames <- c("production", "NP Cost", "NODU")
-    } else if (sum(idx) / length(clusterNames) == 2) {
-      colNames <- c("production", "NP Cost")
-    } else {
-      colNames <- c("production")
-    }
-    
     # Loop over clusters
     nclusters <- length(clusterNames)
     ncols <- length(colNames)
@@ -457,26 +472,32 @@
   # To improve greatly the performance we use our knowledge of the position of 
   # the columns instead of using more general functions like dcast.
   reshapeFun <- function(x) {
+  
+    corr_res_clusters <- pkgEnv$output_correspondance
+    cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_TYPE")
+    corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_TYPE == "res_clusters", cols_to_keep]
+    
     # Get cluster names
     n <- names(x)
     idx <- ! n %in% pkgEnv$idVars
-    clusterNames <- tolower(unique(n[idx]))
+    clusters <- n[idx]
+    # Split the data with the specific separator defined in .getOutputHeader()
+    specific_separator <- "XXX"
+    outputElts <- lapply(strsplit(clusters, split = specific_separator),
+                           function(y) list("cluster" = y[-length(y)], "var" = y[length(y)])
+                         )
+    clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster")))
+    
+    # output colnames
+    colNames <- sapply(outputElts, "[[", "var")
+    cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME")
+    corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep]
+    colNames <- corr_res_clusters[order(corr_res_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"]
     
     # Id vars names
     idVarsId <- which(!idx)
     idVarsNames <- n[idVarsId]
     
-    # Get final value columns
-    # Get final value columns
-    # colNames <- c("resProduction")
-    if (sum(idx) / length(clusterNames) == 3) {
-      colNames <- c("production", "NP Cost", "NODU")
-    } else if (sum(idx) / length(clusterNames) == 2) {
-      colNames <- c("production", "NP Cost")
-    } else {
-      colNames <- c("production")
-    }
-    
     # Loop over clusters
     nclusters <- length(clusterNames)
     ncols <- length(colNames)

From b84a62bbaf20a031b0e649d17bf345def0e8da74 Mon Sep 17 00:00:00 2001
From: kemihak <kamel.kemiha@rte-france.com>
Date: Thu, 11 Jan 2024 10:17:44 +0100
Subject: [PATCH 3/4] Add unit test to check the output column names

---
 tests/testthat/test-readAntares.R | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/testthat/test-readAntares.R b/tests/testthat/test-readAntares.R
index 611db921..a5fef918 100644
--- a/tests/testthat/test-readAntares.R
+++ b/tests/testthat/test-readAntares.R
@@ -28,6 +28,16 @@ sapply(studyPathS, function(studyPath){
     expect_equal(nrow(clusters), 24 * 7 * nweeks * nrow(readClusterDesc()))
   })
   
+  test_that("Clusters importation column names are ok", {
+    clusters <- readAntares(clusters = opts$areasWithClusters,
+                            timeStep = "hourly",
+                            mcYears = "all",
+                            opts = opts,
+                            showProgress = FALSE)
+    expect_is(clusters, "data.table")
+    expect_equal(setdiff(colnames(clusters),pkgEnv$idVars), c("production", "NP Cost", "NODU"))
+  })
+  
   test_that("importation of different objects works", {
     out <- readAntares(areas = opts$areaList, links=opts$linkList,
                        clusters=opts$areasWithClusters, showProgress= FALSE, timeStep = "annual")

From b8f23babaf72354a77c03ecce60d49da7dffa523 Mon Sep 17 00:00:00 2001
From: kemihak <kamel.kemiha@rte-france.com>
Date: Thu, 11 Jan 2024 10:18:33 +0100
Subject: [PATCH 4/4] Add .get_values_columns_details_file to compute the
 output column names

---
 R/importOutput.R | 113 ++++++++++++++++++++++++++---------------------
 1 file changed, 63 insertions(+), 50 deletions(-)

diff --git a/R/importOutput.R b/R/importOutput.R
index 1e28689d..7da193b5 100644
--- a/R/importOutput.R
+++ b/R/importOutput.R
@@ -32,22 +32,15 @@
                         error = function(e) NULL)
   }
   if(!is.null(colname)){
-    path_elts <- unlist(strsplit(path, split = "/"))
-    if (startsWith(path_elts[length(path_elts)], "details-")) {
-      # Put a custom separator XXX to be able to split data if necessary
-      colname <- apply(colname[c(1:3),], 2, paste, collapse = "XXX") 
-      colname[1:2] <- c(objectName, "timeId")
-      colname <- gsub("^XXX{1,}|XXXEXP$|XXXvalues$|XXX{1,}$", "", colname)
-    } else {
-      colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") 
-      colname[1:2] <- c(objectName, "timeId")
-      colname <- gsub("^_|_EXP$|_values$|_$", "", colname)
-    }
+    colname <- apply(colname[c(1,3),], 2, paste, collapse = "_") 
+    colname[1:2] <- c(objectName, "timeId")
+    colname <- gsub("^_|_EXP$|_values$|_$", "", colname)
   }
   
   colname
 }
 
+
 #' .importOutput
 #'
 #' Private function used to import the results of a simulation. The type of result
@@ -286,6 +279,54 @@
 }
 
 
+#' .get_value_columns_details_file
+#'
+#' Private function used to get the column names for the details-<timeStep>.txt or details-res-<timestep>.txt.
+#' Used in .importOutputForClusters() and importOutputForResClusters() 
+#'
+#' @return
+#' a vector
+#'
+#' @noRd
+#'
+.get_value_columns_details_file <- function(opts, type) {
+  
+  ## details part
+  if(type == "details") {
+    # Order is important. There is a correspondance between elements.
+    all_thematic_variables <- c("DTG by plant", "NP Cost by plant", "NODU by plant")
+    all_output_colnames <- c("production", "NP Cost", "NODU")
+    if (opts$antaresVersion >= 830){
+      all_thematic_variables <- c(all_thematic_variables, "Profit by plant")
+      all_output_colnames <- c(all_output_colnames, "profit")
+    }
+    colNames <- all_output_colnames
+    if ("variables selection" %in% names(opts$parameters)) {
+      selection_type <- unique(names(opts$parameters$`variables selection`))
+      selected_variables <- unlist(opts$parameters$`variables selection`, use.names = FALSE)
+      # Index of the variables found in the section "variables selection"
+      idx_vars <- which(all_thematic_variables %in% selected_variables)
+      if (length(idx_vars) > 0) {
+        if (selection_type == "select_var -") {
+          # vars to remove
+          colNames <- colNames[-idx_vars]
+        } else if (selection_type == "select_var +") {
+          # vars to keep
+          colNames <- colNames[idx_vars]
+        }
+      }
+    }  
+  }
+  
+  ## details-res part
+  if(type == "details-res") {
+    colNames <- c("production")
+  }  
+  
+  return(colNames)
+}
+
+
 #' .importOutputForClusters
 #'
 #' Private function used to import the output for the thermal clusters of one area
@@ -305,42 +346,28 @@
   # the columns instead of using more general functions like dcast.
   reshapeFun <- function(x) {
     
-    corr_clusters <- pkgEnv$output_correspondance
-    cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_TYPE")
-    corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_TYPE == "clusters", cols_to_keep]
-    
     # Get cluster names
     n <- names(x)
     idx <- ! n %in% pkgEnv$idVars
-    clusters <- n[idx]
-    # Split the data with the specific separator defined in .getOutputHeader()
-    specific_separator <- "XXX"
-    outputElts <- lapply(strsplit(clusters, split = specific_separator),
-                           function(y) list("cluster" = y[-length(y)], "var" = y[length(y)])
-                         )
-    clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster")))
-		
-    # output colnames
-    colNames <- sapply(outputElts, "[[", "var")
-    cols_to_keep <- setdiff(colnames(corr_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME")
-    corr_clusters <- corr_clusters[corr_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep]
-    colNames <- corr_clusters[order(corr_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"]
+    clusterNames <- tolower(unique(n[idx]))
     
     # Id vars names
     idVarsId <- which(!idx)
     idVarsNames <- n[idVarsId]
     
+    # Column names of the output table
+    colNames <- .get_value_columns_details_file(opts, "details")    
+    
     # Loop over clusters
     nclusters <- length(clusterNames)
-    ncols <- length(colNames)
     
     res <- llply(1:nclusters, function(i) {
-      dt <- x[, c(nclusters * 0:(ncols - 1) + i, idVarsId), with = FALSE]
+      dt <- x[, c(nclusters * 0:(length(colNames) - 1) + i, idVarsId), with = FALSE]
       setnames(dt, c(colNames, idVarsNames))
       dt[, cluster := as.factor(clusterNames[i])]
       dt
     })
-    
+  
     rbindlist(res)
   }
   
@@ -472,38 +499,24 @@
   # To improve greatly the performance we use our knowledge of the position of 
   # the columns instead of using more general functions like dcast.
   reshapeFun <- function(x) {
-  
-    corr_res_clusters <- pkgEnv$output_correspondance
-    cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_TYPE")
-    corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_TYPE == "res_clusters", cols_to_keep]
     
     # Get cluster names
     n <- names(x)
     idx <- ! n %in% pkgEnv$idVars
-    clusters <- n[idx]
-    # Split the data with the specific separator defined in .getOutputHeader()
-    specific_separator <- "XXX"
-    outputElts <- lapply(strsplit(clusters, split = specific_separator),
-                           function(y) list("cluster" = y[-length(y)], "var" = y[length(y)])
-                         )
-    clusterNames <- tolower(unique(sapply(outputElts, "[[", "cluster")))
-    
-    # output colnames
-    colNames <- sapply(outputElts, "[[", "var")
-    cols_to_keep <- setdiff(colnames(corr_res_clusters), "ANTARES_OUTPUT_FILE_COLUMN_NAME")
-    corr_res_clusters <- corr_res_clusters[corr_res_clusters$ANTARES_OUTPUT_FILE_COLUMN_NAME %in% colNames, cols_to_keep]
-    colNames <- corr_res_clusters[order(corr_res_clusters$ANTARES_OUTPUT_ORDINAL_POSITION), "ANTARES_OUTPUT_R_VARIABLE"]
+    clusterNames <- tolower(unique(n[idx]))
     
     # Id vars names
     idVarsId <- which(!idx)
     idVarsNames <- n[idVarsId]
     
+    # Column names of the output table
+    colNames <- .get_value_columns_details_file(opts, "details-res")  
+    
     # Loop over clusters
     nclusters <- length(clusterNames)
-    ncols <- length(colNames)
     
     res <- llply(1:nclusters, function(i) {
-      dt <- x[, c(nclusters * 0:(ncols - 1) + i, idVarsId), with = FALSE]
+      dt <- x[, c(nclusters * 0:(length(colNames) - 1) + i, idVarsId), with = FALSE]
       setnames(dt, c(colNames, idVarsNames))
       dt[, cluster := as.factor(clusterNames[i])]
       dt