Skip to content

Commit

Permalink
Improved implementation of crit_wls and use of other ls criteria ...
Browse files Browse the repository at this point in the history
... number and name of arguments are now automatically detected (no more needed to use ...)

+ improved doc and added tests for wls
  • Loading branch information
sbuis committed Nov 23, 2023
1 parent f9d18e3 commit 610c852
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 83 deletions.
63 changes: 42 additions & 21 deletions R/estim_param.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
#'
#' @param weight Weights to use in the criterion to optimize. A function that takes in input a vector
#' of observed values and the name of the corresponding variable and that must return either a single value
#' for the weights for the given variable or a vector of values of length the length of the observed values given in input.
#' for the weights for the given variable or a vector of values of length the length of the vector of observed values given in input.
#'
#' @param var_names `r lifecycle::badge("deprecated")` `var_names` is no
#' longer supported, use `var` instead.
Expand Down Expand Up @@ -214,6 +214,19 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
var_names <- var # to remove when we update inside the function
}

# Initialize res
res <- list()

Check warning on line 218 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L218

Added line #L218 was not covered by tests

# Create an environment accessible by all functions for storing information during the estimation process
parent <- eval(parse(text = ".GlobalEnv"))
.croptEnv <- new.env(parent)
assign(
x = ".croptEnv",
value = .croptEnv,
pos = parent

Check warning on line 226 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L221-L226

Added lines #L221 - L226 were not covered by tests
)
.croptEnv$total_eval_count <- 0

Check warning on line 228 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L228

Added line #L228 was not covered by tests

# Remove CroptimizR environment before exiting and save stored results (even if the process crashes)
on.exit({
if (exists(".croptEnv")) {
Expand All @@ -222,16 +235,6 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
save(res, file = file.path(path_results_ORI, "optim_results.Rdata"))
})

# Initialize res
res <- list()

# Measured elapse time
tictoc::tic.clearlog()
tictoc::tic(quiet = TRUE)

# set seed
set.seed(optim_options$ranseed)

# Check inputs

## optim_options
Expand Down Expand Up @@ -342,19 +345,37 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,

## weight
if (!is.function(weight) && !is.null(weight)) {
stop("Incorrect format for argument weight Should be a function or NULL.")
stop("Incorrect format for argument weight: should be a function or NULL.")

Check warning on line 348 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L347-L348

Added lines #L347 - L348 were not covered by tests
}
if (is.function(weight)) {
tryCatch(
weight(c(1,2,3), "var1"),
error = function(cond) {
message(paste("Caught an error while testing argument weight: \n
it must be a function that takes 2 input arguments (vector of observed
values and name of corresponding variable)"))
print(cond)
stop()

Check warning on line 358 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L350-L358

Added lines #L350 - L358 were not covered by tests
}
)
w <- weight(c(1,2,3), "var1")
if (!is.numeric(w)) {
stop("Caught an error while testing argument weight: \n
it must be function that returns a numeric value (or vector of).")

Check warning on line 364 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L361-L364

Added lines #L361 - L364 were not covered by tests
}
if (length(w)!=1 & length(w)!=length(c(1,2,3))) {
stop("Caught an error while testing argument weight: \n
it must be a function that returns a single value or a vector of values of size the size of
the vector of observed values given as first argument.")

Check warning on line 369 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L366-L369

Added lines #L366 - L369 were not covered by tests
}
}

# Measured elapse time
tictoc::tic.clearlog()
tictoc::tic(quiet = TRUE)

Check warning on line 375 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L374-L375

Added lines #L374 - L375 were not covered by tests

# Create an environment accessible by all functions for storing information during the estimation process
parent <- eval(parse(text = ".GlobalEnv"))
.croptEnv <- new.env(parent)
assign(
x = ".croptEnv",
value = .croptEnv,
pos = parent
)
.croptEnv$total_eval_count <- 0
# set seed
set.seed(optim_options$ranseed)

Check warning on line 378 in R/estim_param.R

View check run for this annotation

Codecov / codecov/patch

R/estim_param.R#L378

Added line #L378 was not covered by tests

# Initializations before parameter selection loop
oblig_param_list <- setdiff(param_names, candidate_param)
Expand Down
70 changes: 26 additions & 44 deletions R/ls_criteria.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#'
#' @param sim_list List of simulated variables
#' @param obs_list List of observed variables
#' @param weight Weights to use in the criterion to optimize. A function that takes in input a vector
#' of observed values and the name of the corresponding variable and that must return either a single value
#' for the weights for the given variable or a vector of values of length the length of the vector of observed values given in input.
#'
#' @return The value of the criterion given the observed and simulated values of
#' the variables.
Expand All @@ -20,16 +23,24 @@
#' \deqn{ \sum_{i,j,k} [Y_{ijk}-f_{jk}(X_i;\theta)]^2 }
#' where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
#' situation,
#' \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, and \eqn{n_j} the number of measurements of variable \eqn{j}. \cr
#' In this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time and equal for the different variables \eqn{j}.
#' \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction. \cr
#' Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time and equal for the different variables \eqn{j}.
#' \item `crit_wls`: weighted least squares \cr
#' The weighted sum of squared residues for each variable: \cr
#' \deqn{ \sum_{i,j,k} [\frac{Y_{ijk}-f_{jk}(X_i;\theta)}{w_{i,j,k}}]^2 }
#' where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
#' situation,
#' \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, \cr
#' and \eqn{w_{i,j,k}} a weight. \cr
#' Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variances are equal to \eqn{w_{i,j,k}^2} \eqn{j}.
#' \item `crit_log_cwss`: log transformation of concentrated version of weighted sum of squares \cr
#' The concentrated version of weighted sum of squares is: \cr
#' \deqn{ \prod_{j} {(\frac{1}{n_j} \sum_{i,k} [Y_{ijk}-f_{jk}(X_i;\theta)]^2 )} ^{n_j/2} }
#' where \eqn{ Y_{ijk} } is the observed value for the \eqn{k^{th}} time point of the \eqn{j^{th}} variable in the \eqn{i^{th}}
#' situation,
#' \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, and \eqn{n_j} the number of measurements of variable \eqn{j}. \cr
#' `crit_log_cwss` computes the log of this equation. \cr
#' In this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time but may be different between variables \eqn{j}.
#' Using this criterion, one assume that all errors (model and observations errors for all variables, dates and situations) are independent, and that the error variance is constant over time but may be different between variables \eqn{j}.
#' These error variances are automatically estimated.
#' More details about this criterion are given in Wallach et al. (2011), equation 5.
#' \item `crit_log_cwss_corr`: log transformation of concentrated version of weighted sum of squares with hypothesis of high correlation between errors for different measurements over time \cr
Expand All @@ -39,15 +50,15 @@
#' situation,
#' \eqn{ f_{jk}(X_i;\theta) } the corresponding model prediction, \eqn{N_j} the number of situations including at least one observation of variable \eqn{j}, and \eqn{n_{ij}} the number of observation of variable \eqn{j} on situation \eqn{i}. . \cr
#' `crit_log_cwss_corr` computes the log of this equation. \cr
#' In this criterion, one still assume that errors in different
#' Using this criterion, one still assume that errors in different
#' situations or for different variables in the same situation are
#' independent.
#' However, errors for different observations over time of the same
#' variable in the same situation are assumed to be highly correlated.
#' In this way, each situation contributes a single term to the
#' overall sum of squared errors regardless of the number of
#' observations which may be usefull in case one have situations with
#' very heterogenous number of dates of observations.
#' observations which may be useful in case one have situations with
#' very heterogeneous number of dates of observations.
#' More details about this criterion are given in Wallach et al.
#' (2011), equation 8.
#' }
Expand All @@ -63,7 +74,7 @@ NULL

#' @export
#' @rdname ls_criteria
crit_ols <- function(sim_list, obs_list, ...) {
crit_ols <- function(sim_list, obs_list) {
# return criterion type (ls, log-ls, likelihood, log-likelihood)
# if no argument given

Expand All @@ -90,55 +101,26 @@ crit_ols <- function(sim_list, obs_list, ...) {

#' @export
#' @rdname ls_criteria
crit_wls <- function(sim_list, obs_list, weight, alpha=1) {

# weight: for the moment weight is supposed to be a list of shape similar to obs_list
# several shape for weight could be considered (a named vector (per variable),
# a list similar to observations, or a df with a column situation to consider
# different values for different situations)
# alpha: multiplicative coefficient applied to weight
crit_wls <- function(sim_list, obs_list, weight) {

if(!nargs()) return("ls") # return criterion type (ls, log-ls, likelihood, log-likelihood) if no argument given

Check warning on line 106 in R/ls_criteria.R

View check run for this annotation

Codecov / codecov/patch

R/ls_criteria.R#L106

Added line #L106 was not covered by tests

var_list <- unique(unlist(lapply(obs_list, function(x) colnames(x))))
var_list <- setdiff(var_list, "Date")

if (is.list(weight) & all(sapply(weight, function(x) is.data.frame(x)))) {
# intersect weight with obs ...
tmp <- intersect_sim_obs(weight, obs_list)
if (!is.list(tmp)) {
warning("Intersection of weights and observations is empty (no date and/or variable in common)!")
return(NA)
}
weight <- tmp$sim_list
flag_weight_vec <- FALSE
} else if (is.vector(weight) | is.list(weight)) {
if (!all(var_list %in% names(weight))) {
stop(paste("Argument `weight` must include one value per observed variable. \n Seems that no value has been provided for variable",
setdiff(var_list, names(weight))))
}
flag_weight_vec <- TRUE
} else {
stop("Argument `weight` must be either a named list, of shape similar to obs_list, or a named vector (one value per observed variable)")
}


result <- 0

for (var in var_list) {
obs <- unlist(sapply(obs_list, function(x) x[is.element(colnames(x), var)]))
sim <- unlist(sapply(sim_list, function(x) x[is.element(colnames(x), var)]))
if (flag_weight_vec) {
crt_weight <- weight[[var]]
} else {
crt_weight <- unlist(sapply(weight, function(x) x[is.element(colnames(x), var)]))
}
res <- (obs - sim) / (crt_weight*alpha)
res <- res[!is.na(res)]
res <- obs - sim
id_not_na <- !is.na(res)
res <- res[id_not_na]
sigma <- weight(obs[id_not_na], var)

sz <- length(res)

result <- result + res %*% res
result <- result + (res/sigma) %*% (res/sigma)
}

return(as.numeric(result))
Expand All @@ -147,7 +129,7 @@ crit_wls <- function(sim_list, obs_list, weight, alpha=1) {

#' @export
#' @rdname ls_criteria
crit_log_cwss <- function(sim_list, obs_list, ...) {
crit_log_cwss <- function(sim_list, obs_list) {
# return criterion type (ls, log-ls, likelihood, log-likelihood) #
# if no argument given

Expand Down Expand Up @@ -175,7 +157,7 @@ crit_log_cwss <- function(sim_list, obs_list, ...) {

#' @export
#' @rdname ls_criteria
crit_log_cwss_corr <- function(sim_list, obs_list, ...) {
crit_log_cwss_corr <- function(sim_list, obs_list) {
# return criterion type (ls, log-ls, likelihood, log-likelihood)
# if no argument given
if (!nargs()) {
Expand Down
5 changes: 4 additions & 1 deletion R/main_crit.R
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ main_crit <- function(param_values, crit_options) {
)

# Compute criterion value
crit <- crit_function(obs_sim_list$sim_list, obs_sim_list$obs_list, crit_options$weight)
potential_arglist <- list(sim_list=obs_sim_list$sim_list,
obs_list=obs_sim_list$obs_list,
weight=crit_options$weight)
crit <- do.call(crit_function, args = potential_arglist[names(formals(crit_function))])

Check warning on line 438 in R/main_crit.R

View check run for this annotation

Codecov / codecov/patch

R/main_crit.R#L435-L438

Added lines #L435 - L438 were not covered by tests
if (is.nan(crit)) {
warning(paste0("Optimized criterion returned NaN value. \n Estimated parameters: ", paste(param_names, collapse = " "), ", values: ", paste(param_values, collapse = " ")))
}
Expand Down
2 changes: 1 addition & 1 deletion man/estim_param.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 22 additions & 10 deletions man/ls_criteria.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 21 additions & 6 deletions tests/testthat/test-ls_criteria.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,31 @@ sim_list2 <- list(

test_that("crit_ols", {
expect_equal(crit_ols(sim_list, sim_list), 0)
expect_equal(crit_ols(obs_list, sim_list), 8)
expect_equal(crit_ols(obs_list2, sim_list2), 21)
expect_equal(crit_ols(sim_list, obs_list), 8)
expect_equal(crit_ols(sim_list2, obs_list2), 21)
})
w_inf <- function(...) {
return(Inf)
}
w_1 <- function(...) {
return(1)
}
w_obs <- function(obs, ...) {
return(obs)
}
test_that("crit_wls", {
expect_equal(crit_wls(sim_list, sim_list, w_1), 0)
expect_equal(crit_wls(sim_list2, obs_list2, w_1), crit_ols(obs_list2, sim_list2))
expect_equal(crit_wls(sim_list2, obs_list2, w_inf), 0)
expect_equal(crit_wls(sim_list2, obs_list2, w_obs), 10)
})
test_that("crit_log_cwss", {
expect_equal(crit_log_cwss(sim_list, sim_list), -Inf)
expect_equal(crit_log_cwss(obs_list, sim_list), log(2))
expect_equal(crit_log_cwss(obs_list2, sim_list2), log((17 / 3)^ (3 / 2)))
expect_equal(crit_log_cwss(sim_list, obs_list), log(2))
expect_equal(crit_log_cwss(sim_list2, obs_list2), log((17 / 3)^ (3 / 2)))
})
test_that("crit_log_cwss_corr", {
expect_equal(crit_log_cwss_corr(sim_list, sim_list), -Inf)
expect_equal(crit_log_cwss_corr(obs_list, sim_list), log(2))
expect_equal(crit_log_cwss_corr(obs_list2, sim_list2), log((21 / 4)))
expect_equal(crit_log_cwss_corr(sim_list, obs_list), log(2))
expect_equal(crit_log_cwss_corr(sim_list2, obs_list2), log((21 / 4)))
})

0 comments on commit 610c852

Please sign in to comment.