Skip to content

Commit

Permalink
Merge pull request #10 from SticsRPacks/AgMIP_phaseIV
Browse files Browse the repository at this point in the history
Improvements for AgMIP calibration phase IV
  • Loading branch information
sbuis authored Nov 28, 2023
2 parents 0a9d5c4 + bd613f5 commit d596fdd
Show file tree
Hide file tree
Showing 58 changed files with 347 additions and 77 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ jobs:
# see also: https://stackoverflow.com/questions/73243945/pkgdown-action-failing-at-build-xml
extra-packages: |
any::rcmdcheck
SticsRPacks/SticsRFiles@main
needs: check
exclude: ape

Expand All @@ -70,7 +69,7 @@ jobs:
- name: Install custom R dependencies
run: |
# Install dev version of SticsRFiles to be in sync for tests (packages are developed synchronously):
remotes::install_github("SticsRPacks/SticsRFiles@main", dependencies = FALSE, upgrade = "never")
remotes::install_github("SticsRPacks/SticsRFiles@main", dependencies = FALSE, upgrade = "never", force=TRUE)
shell: Rscript {0}

- uses: r-lib/actions/check-r-package@v2
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(BIC)
export(crit_log_cwss)
export(crit_log_cwss_corr)
export(crit_ols)
export(crit_wls)
export(estim_param)
export(filter_obs)
export(likelihood_log_ciidn)
Expand Down
2 changes: 1 addition & 1 deletion R/FwdRegAgMIP.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ post_treat_FwdRegAgMIP <- function(optim_results, crit_options, crt_list,
## RE-compute main_crit with the initial values of the parameters
init_crit <- main_crit(
param_values = optim_results$init_values[optim_results$ind_min_crit, ],
crit_options = c(crit_options, return_obs_sim = FALSE)
crit_options = c(crit_options, return_detailed_info = FALSE)
)

## Store the results per step
Expand Down
52 changes: 52 additions & 0 deletions R/compute_eq_const.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' @title Compute equality constraints on model wrapper input parameters
#'
#' @inheritParams estim_param
#' @param param_values Named vector or tibble, value(s) of the estimated parameters
#' as provided by the parameter estimation algorithm (and possibly transformed in
#' main_crit function)
#'
#' @return Named vector or tibble (same shape as param_values) containing the values
#' for the model parameters to force.
#'
#' @keywords internal
#'
compute_eq_const <- function(forced_param_values, param_values) {

comp_forced_values <- NULL
if (!is.null(forced_param_values)) {

param_values <- tibble::tibble(!!!param_values)
param_values$situation <- NULL
nrows <- max(1,seq_len(nrow(param_values)))
comp_forced_values <- matrix(ncol = length(forced_param_values),
nrow = nrows)
colnames(comp_forced_values) <- names(forced_param_values)

for (irow in nrows) {

expr_ls <-
lapply(names(forced_param_values), function(x) paste(x,"<-",forced_param_values[[x]]))
names(expr_ls) <- names(forced_param_values)

for (par in names(param_values)) {
eval(parse(text = paste(par,"<-",param_values[[irow, par]])))
}
for (par in names(forced_param_values)) {
eval(parse(text = expr_ls[[par]]))
eval(parse(text = paste0("comp_forced_values[irow,\"",par,"\"] <- ",par)))
}

}

if (nrows==1) {
comp_forced_values <- comp_forced_values[1,]
} else {
comp_forced_values <- tibble::as_tibble(comp_forced_values)
}

}

return(comp_forced_values)

}

125 changes: 91 additions & 34 deletions R/estim_param.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,22 @@
#' - `ub` and `lb`, vectors of upper and lower bounds (one value per group),
#' - `init_values`, the list of initial values per group (data.frame, one column per group, optional).
#'
#' @param forced_param_values Named vector, must contain the values for the model parameters
#' to force (optional, NULL by default). These values will be transferred to the
#' model wrapper through its param_values argument so that the given parameters
#' always take the same values for each model simulation. Should not include values
#' for estimated parameters (i.e. parameters defined in `param_info` argument).
#' @param forced_param_values Named vector or list, must contain the values (or
#' arithmetic expression, see details section) for the model parameters to force. The corresponding
#' values will be transferred to the model wrapper through its param_values argument
#' during the estimation process.
#' Should not include values for estimated parameters (i.e. parameters defined in
#' `param_info` argument), except if they are listed as candidate parameters (see
#' argument `candidate_param`).
#'
#' @param candidate_param Names of the parameters, among those defined in the argument param_info,
#' that must only be considered as candidate for parameter estimation (see details section).
#'
#' @param transform_var Named vector of functions to apply both on simulated and
#' observed variables. `transform_var=c(var1=log, var2=sqrt)` will for example
#' apply log-transformation on simulated and observed values of variable var1,
#' and square-root transformation on values of variable var2.
#'
#' @param transform_obs User function for transforming observations before each criterion
#' evaluation (optional), see details section for more information.
#'
Expand Down Expand Up @@ -98,11 +105,14 @@
#' is defined (see details section)), the first information criterion given will be used.
#' ONLY AVAILABLE FOR THE MOMENT FOR crit_function==crit_ols.
#'
#' @param weight Weights to use in the criterion to optimize. A function that takes in input a vector
#' of observed values and the name of the corresponding variable and that must return either a single value
#' for the weights for the given variable or a vector of values of length the length of the vector of observed values given in input.
#'
#' @param var_names `r lifecycle::badge("deprecated")` `var_names` is no
#' longer supported, use `var` instead.
#'
#' @details
#'
#' In CroptimizR, parameter estimation is based on the comparison between the values
#' of the observed and simulated variables at corresponding dates. Only the situations,
#' variables and dates common to both observations (provided in `obs_list` argument),
Expand Down Expand Up @@ -153,6 +163,20 @@
#' It must return a logical indicating if the parameters values satisfies the constraints
#' (freely defined by the user in the function body) or not.
#'
#' The optional argument `forced_param_values` may contain arithmetic expressions to
#' automatically compute the values of some parameters in function of the values of
#' parameters that are estimated (equality constraints). For that, `forced_param_values`
#' must be a named list. Arithmetic expressions must be R expressions given under the
#' shape of character strings. For example:
#'
#' forced_param_values = list(p1=5, p2=7, p3="5*p5+p6")
#'
#' will pass to the model wrapper the value 5 for parameter p1, 7 for parameter p2,
#' and will dynamically compute the value of p3 in function of the values of parameters
#' p5 and p6 iteratively provided by the parameter estimation algorithm. In this example,
#' the parameters p5 and p6 must thus be part of the list of parameters to estimate, i.e.
#' described in the `param_info` argument.
#'
#' @return prints, graphs and a list containing the results of the parameter estimation,
#' which content depends on the method used and on the values of the `info_level` argument.
#' All results are saved in the folder `optim_options$out_dir`.
Expand All @@ -167,13 +191,14 @@
estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
model_options = NULL, optim_method = "nloptr.simplex",
optim_options = NULL, param_info, forced_param_values = NULL,
candidate_param = NULL, transform_obs = NULL,
candidate_param = NULL, transform_var = NULL, transform_obs = NULL,
transform_sim = NULL, satisfy_par_const = NULL,
var = NULL, info_level = 1,
info_crit_func = list(
CroptimizR::BIC, CroptimizR::AICc,
CroptimizR::AIC
),
weight=NULL,
var_names = lifecycle::deprecated()) {

# Managing parameter names changes between versions:
Expand All @@ -189,6 +214,19 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
var_names <- var # to remove when we update inside the function
}

# Initialize res
res <- list()

# Create an environment accessible by all functions for storing information during the estimation process
parent <- eval(parse(text = ".GlobalEnv"))
.croptEnv <- new.env(parent)
assign(
x = ".croptEnv",
value = .croptEnv,
pos = parent
)
.croptEnv$total_eval_count <- 0

# Remove CroptimizR environment before exiting and save stored results (even if the process crashes)
on.exit({
if (exists(".croptEnv")) {
Expand All @@ -197,16 +235,6 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
save(res, file = file.path(path_results_ORI, "optim_results.Rdata"))
})

# Initialize res
res <- list()

# Measured elapse time
tictoc::tic.clearlog()
tictoc::tic(quiet = TRUE)

# set seed
set.seed(optim_options$ranseed)

# Check inputs

## optim_options
Expand Down Expand Up @@ -266,14 +294,17 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
stop("Incorrect format for argument forced_param_values, should be a vector.")
}
if (any(names(forced_param_values) %in% setdiff(param_names, candidate_param))) {
stop(
tmp <- intersect(names(forced_param_values), setdiff(param_names, candidate_param))
warning(
"The following parameters are defined both in forced_param_values and param_info
arguments of estim_param function while they should not (a parameter cannot
be both forced and estimated except if it is part of the `candidate` parameters):",
paste(intersect(names(forced_param_values), setdiff(param_names, candidate_param)),
collapse = ","
)
paste(tmp,collapse = ","),
"\n They will be removed from forced_param_values."
)
forced_param_values <-
forced_param_values[setdiff(names(forced_param_values),
setdiff(param_names, candidate_param))]
}
}

Expand All @@ -296,9 +327,9 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
}
})
# set to NULL the info_crit that are not compatible with the crit_function used
info_crit_list[sapply(info_crit_list, function(x) {
(x()$name == "AIC" || x()$name == "AICc" || x()$name == "BIC") && !identical(crit_function, crit_ols)
})] <- NULL
# info_crit_list[sapply(info_crit_list, function(x) {
# (x()$name == "AIC" || x()$name == "AICc" || x()$name == "BIC") && !identical(crit_function, crit_ols)
# })] <- NULL
}
if (length(info_crit_list) == 0) info_crit_list <- NULL

Expand All @@ -312,15 +343,39 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
stop("Parameters included in argument candidate_param must be defined in param_info argument.")
}

# Create an environment accessible by all functions for storing information during the estimation process
parent <- eval(parse(text = ".GlobalEnv"))
.croptEnv <- new.env(parent)
assign(
x = ".croptEnv",
value = .croptEnv,
pos = parent
)
.croptEnv$total_eval_count <- 0
## weight
if (!is.function(weight) && !is.null(weight)) {
stop("Incorrect format for argument weight: should be a function or NULL.")
}
if (is.function(weight)) {
tryCatch(
weight(c(1,2,3), "var1"),
error = function(cond) {
message(paste("Caught an error while testing argument weight: \n
it must be a function that takes 2 input arguments (vector of observed
values and name of corresponding variable)"))
print(cond)
stop()
}
)
w <- weight(c(1,2,3), "var1")
if (!is.numeric(w)) {
stop("Caught an error while testing argument weight: \n
it must be function that returns a numeric value (or vector of).")
}
if (length(w)!=1 & length(w)!=length(c(1,2,3))) {
stop("Caught an error while testing argument weight: \n
it must be a function that returns a single value or a vector of values of size the size of
the vector of observed values given as first argument.")
}
}

# Measured elapse time
tictoc::tic.clearlog()
tictoc::tic(quiet = TRUE)

# set seed
set.seed(optim_options$ranseed)

# Initializations before parameter selection loop
oblig_param_list <- setdiff(param_names, candidate_param)
Expand Down Expand Up @@ -381,13 +436,15 @@ estim_param <- function(obs_list, crit_function = crit_log_cwss, model_function,
param_names = crt_candidates, obs_list = obs_list,
crit_function = crit_function, model_function = model_function,
model_options = model_options, param_info = param_info_tmp,
transform_var = transform_var,
transform_obs = transform_obs, transform_sim = transform_sim,
satisfy_par_const = satisfy_par_const,
path_results = optim_options$path_results,
var_names = var_names,
forced_param_values = forced_param_values_tmp,
info_level = info_level,
info_crit_list = info_crit_list
info_crit_list = info_crit_list,
weight=weight
)

## Run the estimation
Expand Down
3 changes: 2 additions & 1 deletion R/frequentist_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ post_treat_frequentist <- function(optim_options, param_info, optim_results,
# (just to check it is correct and to get the observation list used)
info_final <- main_crit(
param_values = optim_results$final_values,
crit_options = c(crit_options, return_obs_sim = TRUE)
crit_options = c(crit_options, return_detailed_info = TRUE)
)
if (info_final$crit != optim_results$min_crit_value) {
stop(paste(
Expand All @@ -72,6 +72,7 @@ post_treat_frequentist <- function(optim_options, param_info, optim_results,
info_final$crit
))
}
optim_results$forced_param_values <- info_final$forced_param_values

sapply(info_crit_list, function(x) {
final_info_crit <- x(
Expand Down
2 changes: 1 addition & 1 deletion R/information_criteria.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ AIC <- function(obs_list, crit_value, param_nb) {
#'
#' @return Value of the AICc criterion for ordinary least squares.
#' If called without arguments, returns a named list with element "name"
#' containingthe name of the function and "species" containing
#' containing the name of the function and "species" containing
#' "Information criterion"
#'
#' @export
Expand Down
1 change: 1 addition & 0 deletions R/intersect_sim_obs.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,6 @@ intersect_sim_obs <- function(sim_list, obs_list) {
}
})

attr(sim_list, "class") <- "cropr_simulation"
return(list(sim_list = sim_list, obs_list = obs_list))
}
4 changes: 2 additions & 2 deletions R/likelihoods.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
#' variable in the same situation are assumed to be highly correlated.
#' In this way, each situation contributes a single term to the
#' overall sum of squared errors regardless of the number of
#' observations which may be usefull in case one have situations with
#' very heterogenous number of dates of observations.
#' observations which may be useful in case one have situations with
#' very heterogeneous number of dates of observations.
#' }
#' `sim_list` and `obs_list` must have the same structure
#' (i.e. same number of variables, dates, situations, ... use internal function
Expand Down
Loading

0 comments on commit d596fdd

Please sign in to comment.