Improved implementation of crit_wls and use of other ls criteria ...

... number and name of arguments are now automatically detected (no more needed to use ...) + improved doc and added tests for wls
SticsRPacks · Nov 23, 2023 · 610c852 · 610c852
1 parent f9d18e3
commit 610c852
Show file tree

Hide file tree

Showing 6 changed files with 116 additions and 83 deletions.
diff --git a/R/estim_param.R b/R/estim_param.R
@@ -107,7 +107,7 @@
 #'
 #' @param weight Weights to use in the criterion to optimize. A function that takes in input a vector
 #' of observed values and the name of the corresponding variable and that must return either a single value
-#' for the weights for the given variable or a vector of values of length the length of the observed values given in input.
+#' for the weights for the given variable or a vector of values of length the length of the vector of observed values given in input.
 #'
 #' @param var_names `r lifecycle::badge("deprecated")` `var_names` is no
 #'   longer supported, use `var` instead.
@@ -214,6 +214,19 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
     var_names <- var # to remove when we update inside the function
   }
 
+  # Initialize res
+  res <- list()
+
+  # Create an environment accessible by all functions for storing information during the estimation process
+  parent <- eval(parse(text = ".GlobalEnv"))
+  .croptEnv <- new.env(parent)
+  assign(
+    x = ".croptEnv",
+    value = .croptEnv,
+    pos = parent
+  )
+  .croptEnv$total_eval_count <- 0
+
   # Remove CroptimizR environment before exiting and save stored results (even if the process crashes)
   on.exit({
     if (exists(".croptEnv")) {
@@ -222,16 +235,6 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
     save(res, file = file.path(path_results_ORI, "optim_results.Rdata"))
   })
 
-  # Initialize res
-  res <- list()
-
-  # Measured elapse time
-  tictoc::tic.clearlog()
-  tictoc::tic(quiet = TRUE)
-
-  # set seed
-  set.seed(optim_options$ranseed)
-
   # Check inputs
 
   ## optim_options
@@ -342,19 +345,37 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
 
   ## weight
   if (!is.function(weight) && !is.null(weight)) {
-    stop("Incorrect format for argument weight Should be a function or NULL.")
+    stop("Incorrect format for argument weight: should be a function or NULL.")
+  }
+  if (is.function(weight)) {
+    tryCatch(
+      weight(c(1,2,3), "var1"),
+      error = function(cond) {
+        message(paste("Caught an error while testing argument weight: \n
+                 it must be a function that takes 2 input arguments (vector of observed
+                      values and name of corresponding variable)"))
+        print(cond)
+        stop()
+      }
+    )
+    w <- weight(c(1,2,3), "var1")
+    if (!is.numeric(w)) {
+      stop("Caught an error while testing argument weight: \n
+        it must be  function that returns a numeric value (or vector of).")
+    }
+    if (length(w)!=1 & length(w)!=length(c(1,2,3))) {
+      stop("Caught an error while testing argument weight: \n
+        it must be a function that returns a single value or a vector of values of size the size of
+             the vector of observed values given as first argument.")
+    }
   }
 
+  # Measured elapse time
+  tictoc::tic.clearlog()
+  tictoc::tic(quiet = TRUE)
 
-  # Create an environment accessible by all functions for storing information during the estimation process
-  parent <- eval(parse(text = ".GlobalEnv"))
-  .croptEnv <- new.env(parent)
-  assign(
-    x = ".croptEnv",
-    value = .croptEnv,
-    pos = parent
-  )
-  .croptEnv$total_eval_count <- 0
+  # set seed
+  set.seed(optim_options$ranseed)
 
   # Initializations before parameter selection loop
   oblig_param_list <- setdiff(param_names, candidate_param)

diff --git a/R/ls_criteria.R b/R/ls_criteria.R
@@ -7,6 +7,9 @@
 #'
 #' @param sim_list List of simulated variables
 #' @param obs_list List of observed variables
+#' @param weight Weights to use in the criterion to optimize. A function that takes in input a vector
+#' of observed values and the name of the corresponding variable and that must return either a single value
+#' for the weights for the given variable or a vector of values of length the length of the vector of observed values given in input.
 #'
 #' @return The value of the criterion given the observed and simulated values of
 #'  the variables.
@@ -20,16 +23,24 @@
 #'           \deqn{ \sum_{i,j,k} [Y_{ijk}-f_{jk}(X_i;\theta)]^2 }
 #'           where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
 #'           situation,
-#'           \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, and \eqn{n_j} the number of measurements of variable \eqn{j}. \cr
-#'           In this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time and equal for the different variables \eqn{j}.
+#'           \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction. \cr
+#'           Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time and equal for the different variables \eqn{j}.
+#'   \item `crit_wls`: weighted least squares \cr
+#'           The weighted sum of squared residues for each variable: \cr
+#'           \deqn{ \sum_{i,j,k} [\frac{Y_{ijk}-f_{jk}(X_i;\theta)}{w_{i,j,k}}]^2 }
+#'           where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
+#'           situation,
+#'           \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, \cr
+#'           and \eqn{w_{i,j,k}} a weight. \cr
+#'           Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variances are equal to \eqn{w_{i,j,k}^2}  \eqn{j}.
 #'   \item `crit_log_cwss`: log transformation of concentrated version of weighted sum of squares \cr
 #'           The concentrated version of weighted sum of squares is: \cr
 #'           \deqn{ \prod_{j} {(\frac{1}{n_j} \sum_{i,k} [Y_{ijk}-f_{jk}(X_i;\theta)]^2 )} ^{n_j/2} }
 #'           where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
 #'           situation,
 #'           \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, and \eqn{n_j} the number of measurements of variable \eqn{j}. \cr
 #'           `crit_log_cwss` computes the log of this equation. \cr
-#'           In this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time but may be different between variables \eqn{j}.
+#'           Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time but may be different between variables \eqn{j}.
 #'           These error variances are automatically estimated.
 #'           More details about this criterion are given in Wallach et al. (2011), equation 5.
 #'   \item `crit_log_cwss_corr`: log transformation of concentrated version of weighted sum of squares with hypothesis of high correlation between errors for different measurements over time \cr
@@ -39,15 +50,15 @@
 #'           situation,
 #'           \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, \eqn{N_j} the number of situations including at least one observation of variable \eqn{j}, and \eqn{n_{ij}} the number of observation of variable \eqn{j} on situation \eqn{i}. . \cr
 #'           `crit_log_cwss_corr` computes the log of this equation. \cr
-#'           In this criterion, one still assume that errors in different
+#'           Using this criterion, one still assume that errors in different
 #'           situations or for different variables in the same situation are
 #'           independent.
 #'           However, errors for different observations over time of the same
 #'           variable in the same situation are assumed to be highly correlated.
 #'           In this way, each situation contributes a single term to the
 #'           overall sum of squared errors regardless of the number of
-#'           observations which may be usefull in case one have situations with
-#'            very heterogenous number of dates of observations.
+#'           observations which may be useful in case one have situations with
+#'            very heterogeneous number of dates of observations.
 #'           More details about this criterion are given in Wallach et al.
 #'           (2011), equation 8.
 #' }
@@ -63,7 +74,7 @@ NULL
 
 #' @export
 #' @rdname ls_criteria
-crit_ols <- function(sim_list, obs_list, ...) {
+crit_ols <- function(sim_list, obs_list) {
   # return criterion type (ls, log-ls, likelihood, log-likelihood)
   # if no argument given
 
@@ -90,55 +101,26 @@ crit_ols <- function(sim_list, obs_list, ...) {
 
 #' @export
 #' @rdname ls_criteria
-crit_wls <- function(sim_list, obs_list, weight, alpha=1) {
-
-  # weight: for the moment weight is supposed to be a list of shape similar to obs_list
-  # several shape for weight could be considered (a named vector (per variable),
-  # a list similar to observations, or a df with a column situation to consider
-  # different values for different situations)
-  # alpha: multiplicative coefficient applied to weight
+crit_wls <- function(sim_list, obs_list, weight) {
 
   if(!nargs()) return("ls")  # return criterion type (ls, log-ls, likelihood, log-likelihood) if no argument given
 
   var_list <- unique(unlist(lapply(obs_list, function(x) colnames(x))))
   var_list <- setdiff(var_list, "Date")
 
-  if (is.list(weight) & all(sapply(weight, function(x) is.data.frame(x)))) {
-    # intersect weight with obs ...
-    tmp <- intersect_sim_obs(weight, obs_list)
-    if (!is.list(tmp)) {
-      warning("Intersection of weights and observations is empty (no date and/or variable in common)!")
-      return(NA)
-    }
-    weight <- tmp$sim_list
-    flag_weight_vec <- FALSE
-  } else if (is.vector(weight) | is.list(weight)) {
-    if (!all(var_list %in% names(weight))) {
-      stop(paste("Argument `weight` must include one value per observed variable. \n Seems that no value has been provided for variable",
-                 setdiff(var_list, names(weight))))
-    }
-    flag_weight_vec <- TRUE
-  } else {
-    stop("Argument `weight` must be either a named list, of shape similar to obs_list, or a named vector (one value per observed variable)")
-  }
-
-
   result <- 0
 
   for (var in var_list) {
     obs <- unlist(sapply(obs_list, function(x) x[is.element(colnames(x), var)]))
     sim <- unlist(sapply(sim_list, function(x) x[is.element(colnames(x), var)]))
-    if (flag_weight_vec) {
-      crt_weight <- weight[[var]]
-    } else {
-      crt_weight <- unlist(sapply(weight, function(x) x[is.element(colnames(x), var)]))
-    }
-    res <- (obs - sim) / (crt_weight*alpha)
-    res <- res[!is.na(res)]
+    res <- obs - sim
+    id_not_na <- !is.na(res)
+    res <- res[id_not_na]
+    sigma <- weight(obs[id_not_na], var)
 
     sz <- length(res)
 
-    result <- result + res %*% res
+    result <- result + (res/sigma) %*% (res/sigma)
   }
 
   return(as.numeric(result))
@@ -147,7 +129,7 @@ crit_wls <- function(sim_list, obs_list, weight, alpha=1) {
 
 #' @export
 #' @rdname ls_criteria
-crit_log_cwss <- function(sim_list, obs_list, ...) {
+crit_log_cwss <- function(sim_list, obs_list) {
   # return criterion type (ls, log-ls, likelihood, log-likelihood) #
   # if no argument given
 
@@ -175,7 +157,7 @@ crit_log_cwss <- function(sim_list, obs_list, ...) {
 
 #' @export
 #' @rdname ls_criteria
-crit_log_cwss_corr <- function(sim_list, obs_list, ...) {
+crit_log_cwss_corr <- function(sim_list, obs_list) {
   # return criterion type (ls, log-ls, likelihood, log-likelihood)
   # if no argument given
   if (!nargs()) {

diff --git a/R/main_crit.R b/R/main_crit.R
@@ -432,7 +432,10 @@ main_crit <- function(param_values, crit_options) {
   )
 
   # Compute criterion value
-  crit <- crit_function(obs_sim_list$sim_list, obs_sim_list$obs_list, crit_options$weight)
+  potential_arglist <- list(sim_list=obs_sim_list$sim_list,
+                            obs_list=obs_sim_list$obs_list,
+                            weight=crit_options$weight)
+  crit <- do.call(crit_function, args = potential_arglist[names(formals(crit_function))])
   if (is.nan(crit)) {
     warning(paste0("Optimized criterion returned NaN value. \n Estimated parameters: ", paste(param_names, collapse = " "), ", values: ", paste(param_values, collapse = " ")))
   }

diff --git a/man/estim_param.Rd b/man/estim_param.Rd
diff --git a/man/ls_criteria.Rd b/man/ls_criteria.Rd
diff --git a/tests/testthat/test-ls_criteria.R b/tests/testthat/test-ls_criteria.R
@@ -13,16 +13,31 @@ sim_list2 <- list(
 
 test_that("crit_ols", {
   expect_equal(crit_ols(sim_list, sim_list), 0)
-  expect_equal(crit_ols(obs_list, sim_list), 8)
-  expect_equal(crit_ols(obs_list2, sim_list2), 21)
+  expect_equal(crit_ols(sim_list, obs_list), 8)
+  expect_equal(crit_ols(sim_list2, obs_list2), 21)
+})
+w_inf <- function(...) {
+  return(Inf)
+}
+w_1 <- function(...) {
+  return(1)
+}
+w_obs <- function(obs, ...) {
+  return(obs)
+}
+test_that("crit_wls", {
+  expect_equal(crit_wls(sim_list, sim_list, w_1), 0)
+  expect_equal(crit_wls(sim_list2, obs_list2, w_1), crit_ols(obs_list2, sim_list2))
+  expect_equal(crit_wls(sim_list2, obs_list2, w_inf), 0)
+  expect_equal(crit_wls(sim_list2, obs_list2, w_obs), 10)
 })
 test_that("crit_log_cwss", {
   expect_equal(crit_log_cwss(sim_list, sim_list), -Inf)
-  expect_equal(crit_log_cwss(obs_list, sim_list), log(2))
-  expect_equal(crit_log_cwss(obs_list2, sim_list2), log((17 / 3)^ (3 / 2)))
+  expect_equal(crit_log_cwss(sim_list, obs_list), log(2))
+  expect_equal(crit_log_cwss(sim_list2, obs_list2), log((17 / 3)^ (3 / 2)))
 })
 test_that("crit_log_cwss_corr", {
   expect_equal(crit_log_cwss_corr(sim_list, sim_list), -Inf)
-  expect_equal(crit_log_cwss_corr(obs_list, sim_list), log(2))
-  expect_equal(crit_log_cwss_corr(obs_list2, sim_list2), log((21 / 4)))
+  expect_equal(crit_log_cwss_corr(sim_list, obs_list), log(2))
+  expect_equal(crit_log_cwss_corr(sim_list2, obs_list2), log((21 / 4)))
 })