From 98216403b8c537441d36908d46dd8bacb4616305 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Thu, 18 Apr 2024 11:46:28 -0400 Subject: [PATCH] update docs and failing test --- R/args.R | 37 ++++++++++++++++++++++++++------- man-roxygen/model-common-args.R | 18 +++++++++------- man/model-method-diagnose.Rd | 18 +++++++++------- man/model-method-laplace.Rd | 18 +++++++++------- man/model-method-optimize.Rd | 18 +++++++++------- man/model-method-pathfinder.Rd | 18 +++++++++------- man/model-method-sample.Rd | 18 +++++++++------- man/model-method-sample_mpi.Rd | 18 +++++++++------- man/model-method-variational.Rd | 18 +++++++++------- tests/testthat/test-fit-init.R | 3 --- 10 files changed, 109 insertions(+), 75 deletions(-) diff --git a/R/args.R b/R/args.R index 9f3d4eda..c22ebc7a 100644 --- a/R/args.R +++ b/R/args.R @@ -1268,23 +1268,29 @@ process_init_approx <- function(init, num_procs, model_variables = NULL, # Calculate unique draws based on 'lw' using base R functions unique_draws = length(unique(draws_df$lw)) if (num_procs > unique_draws) { - if (inherits(init, "CmdStanPathfinder")) { - stop(paste0("Not enough distinct draws (", num_procs, ") in pathfinder fit to create inits. Try running Pathfinder with psis_resample=FALSE")) + if (inherits(init, " CmdStanPathfinder ")) { + algo_name = " Pathfinder " + extra_msg = " Try running Pathfinder with psis_resample=FALSE." + } else if (inherits(init, "CmdStanVB")) { + algo_name = " CmdStanVB " + extra_msg = "" + } else if (inherits(init, " CmdStanLaplace ")) { + algo_name = " CmdStanLaplace " + extra_msg = "" } else { - stop(paste0("Not enough distinct draws (", num_procs, ") to create inits.")) + algo_name = "" + extra_msg = "" } + stop(paste0("Not enough distinct draws (", num_procs, ") in", algo_name , + "fit to create inits.", extra_msg)) } if (unique_draws < (0.95 * nrow(draws_df))) { temp_df = stats::aggregate(.draw ~ lw, data = draws_df, FUN = min) draws_df = posterior::as_draws_df(merge(temp_df, draws_df, by = 'lw')) draws_df$weight = exp(draws_df$lw - max(draws_df$lw)) } else { - if (inherits(init, "CmdStanPathfinder") && (init$metadata()$psis_resample || !init$metadata()$calculate_lp)) { - draws_df$weight = rep(1.0, nrow(draws_df)) - } else { draws_df$weight = posterior::pareto_smooth( exp(draws_df$lw - max(draws_df$lw)), tail = "right", return_k=FALSE) - } } init_draws_df = posterior::resample_draws(draws_df, ndraws = num_procs, weights = draws_df$weight, method = "simple_no_replace") @@ -1308,7 +1314,22 @@ process_init_approx <- function(init, num_procs, model_variables = NULL, process_init.CmdStanPathfinder <- function(init, num_procs, model_variables = NULL, warn_partial = getOption("cmdstanr_warn_inits", TRUE), ...) { - process_init_approx(init, num_procs, model_variables, warn_partial) + if (!init$metadata()$calculate_lp) { + validate_fit_init(init, model_variables) + # Convert from data.table to data.frame + draws_df = init$draws(format = "df") + if (is.null(model_variables)) { + model_variables = list(parameters = colnames(draws_df)[3:(length(colnames(draws_df)) - 3)]) + } + draws_df$weight = rep(1.0, nrow(draws_df)) + init_draws_df = posterior::resample_draws(draws_df, ndraws = num_procs, + weights = draws_df$weight, method = "simple_no_replace") + init_draws_lst = process_init(init_draws_df, + num_procs = num_procs, model_variables = model_variables, warn_partial) + return(init_draws_lst) + } else { + process_init_approx(init, num_procs, model_variables, warn_partial) + } } #' Write initial values to files if provided as a `CmdStanVB` class diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R index 15655510..5c284d5b 100644 --- a/man-roxygen/model-common-args.R +++ b/man-roxygen/model-common-args.R @@ -44,19 +44,21 @@ #' has argument `chain_id` it will be supplied with the chain id (from 1 to #' number of chains) when called to generate the initial values. See #' **Examples**. -#' * A [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], or [`CmdStanPathfinder`] -#' fit object. If the fit object's parameters are only a subset of the model +#' * A [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanPathfinder`], +#' or [`CmdStanLaplace`] fit object. +#' If the fit object's parameters are only a subset of the model #' parameters then the other parameters will be drawn by Stan's default #' initialization. The fit object must have at least some parameters that are the -#' same name and dimensions as the current Stan model. For the `sampling` and -#' `pathfinder` method, if the fit object has less samples than the requested +#' same name and dimensions as the current Stan model. For the `sample` and +#' `pathfinder` method, if the fit object has fewer draws than the requested #' number of chains/paths then the inits will be drawn using sampling with #' replacement. Otherwise sampling without replacement will be used. #' When a [`CmdStanPathfinder`] fit object is used as the init, if -#' `psis_resample` was set to `FALSE` and `calculate_lp` was -#' set to `TRUE` (default), then PSIS resampling will be used as weights. -#' if `calculate_lp` is `FALSE` then sampling without replacement will be used -#' to select the draws. +#'. `psis_resample` was set to `FALSE` and `calculate_lp` was +#' set to `TRUE` (default), then resampling without replacement with Pareto +#' smoothed weights will be used. If `psis_resample` was set to `TRUE` or +#' `calculate_lp` was set to `FALSE` then sampling without replacement with +#' uniform weights will be used to select the draws. #' PSIS resampling is used to select the draws for [`CmdStanVB`] fit objects. #' #' * A type inheriting from `posterior::draws`. If the draws object has less diff --git a/man/model-method-diagnose.Rd b/man/model-method-diagnose.Rd index c208efd8..093c3a9d 100644 --- a/man/model-method-diagnose.Rd +++ b/man/model-method-diagnose.Rd @@ -61,19 +61,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-laplace.Rd b/man/model-method-laplace.Rd index d71e9c37..ec7b13b5 100644 --- a/man/model-method-laplace.Rd +++ b/man/model-method-laplace.Rd @@ -74,19 +74,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index 4d9c42b6..0d8d9731 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -80,19 +80,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-pathfinder.Rd b/man/model-method-pathfinder.Rd index 1c7bbda1..606c8953 100644 --- a/man/model-method-pathfinder.Rd +++ b/man/model-method-pathfinder.Rd @@ -85,19 +85,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index 821a81f7..ec2a6f82 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -98,19 +98,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index f4b0f6d2..77586a1c 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -97,19 +97,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index 5eef4aa1..665fd9ae 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -81,19 +81,21 @@ take no arguments or a single argument \code{chain_id}. For MCMC, if the functio has argument \code{chain_id} it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See \strong{Examples}. -\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, or \code{\link{CmdStanPathfinder}} -fit object. If the fit object's parameters are only a subset of the model +\item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, +or \code{\link{CmdStanLaplace}} fit object. +If the fit object's parameters are only a subset of the model parameters then the other parameters will be drawn by Stan's default initialization. The fit object must have at least some parameters that are the -same name and dimensions as the current Stan model. For the \code{sampling} and -\code{pathfinder} method, if the fit object has less samples than the requested +same name and dimensions as the current Stan model. For the \code{sample} and +\code{pathfinder} method, if the fit object has fewer draws than the requested number of chains/paths then the inits will be drawn using sampling with replacement. Otherwise sampling without replacement will be used. When a \code{\link{CmdStanPathfinder}} fit object is used as the init, if -\code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was -set to \code{TRUE} (default), then PSIS resampling will be used as weights. -if \code{calculate_lp} is \code{FALSE} then sampling without replacement will be used -to select the draws. +. \code{psis_resample} was set to \code{FALSE} and \code{calculate_lp} was +set to \code{TRUE} (default), then resampling without replacement with Pareto +smoothed weights will be used. If \code{psis_resample} was set to \code{TRUE} or +\code{calculate_lp} was set to \code{FALSE} then sampling without replacement with +uniform weights will be used to select the draws. PSIS resampling is used to select the draws for \code{\link{CmdStanVB}} fit objects. \item A type inheriting from \code{posterior::draws}. If the draws object has less samples than the number of requested chains/paths then the inits will be diff --git a/tests/testthat/test-fit-init.R b/tests/testthat/test-fit-init.R index 75d368a7..e66dd537 100644 --- a/tests/testthat/test-fit-init.R +++ b/tests/testthat/test-fit-init.R @@ -80,9 +80,6 @@ test_that("Pathfinder method with calculate_lp as false works as init", { set.seed(1234) utils::capture.output(fit_path_init <- mod_logistic$pathfinder(seed=1234, data = data_list_logistic, refresh = 0, num_paths = 4, psis_resample = TRUE, calculate_lp = FALSE)) - utils::capture.output(fit_sample_init_simple <- mod_logistic$sample(chains = 1, - data = data_list_logistic, iter_warmup = 100, iter_sampling = 100, - refresh = 0, seed = 1234, init = fit_path_init)) expect_no_error(test_inits(mod_logistic, fit_path_init, data_list_logistic)) utils::capture.output(fit_path_init <- mod_logistic$pathfinder(seed=1234, data = data_list_logistic,