diff --git a/R/args.R b/R/args.R index c22ebc7a..4c64d9c6 100644 --- a/R/args.R +++ b/R/args.R @@ -43,7 +43,8 @@ CmdStanArgs <- R6::R6Class( sig_figs = NULL, opencl_ids = NULL, model_variables = NULL, - num_threads = NULL) { + num_threads = NULL, + save_cmdstan_config = NULL) { self$model_name <- model_name self$stan_code <- stan_code @@ -60,6 +61,7 @@ CmdStanArgs <- R6::R6Class( self$save_latent_dynamics <- save_latent_dynamics self$using_tempdir <- is.null(output_dir) self$model_variables <- model_variables + self$save_cmdstan_config <- save_cmdstan_config if (os_is_wsl()) { # Want to ensure that any files under WSL are written to a tempdir within # WSL to avoid IO performance issues @@ -87,6 +89,9 @@ CmdStanArgs <- R6::R6Class( self$opencl_ids <- opencl_ids self$num_threads = NULL self$method_args$validate(num_procs = length(self$proc_ids)) + if (is.logical(self$save_cmdstan_config)) { + self$save_cmdstan_config <- as.integer(self$save_cmdstan_config) + } self$validate() }, validate = function() { @@ -111,7 +116,7 @@ CmdStanArgs <- R6::R6Class( } else if (type == "profile") { basename <- paste0(basename, "-profile") } - if (type == "output" && !is.null(self$output_basename)) { + if (type == "output" && !is.null(self$output_basename)) { basename <- self$output_basename } generate_file_names( @@ -180,6 +185,9 @@ CmdStanArgs <- R6::R6Class( if (!is.null(profile_file)) { args$output <- c(args$output, paste0("profile_file=", wsl_safe_path(profile_file))) } + if (!is.null(self$save_cmdstan_config)) { + args$output <- c(args$output, paste0("save_cmdstan_config=", self$save_cmdstan_config)) + } if (!is.null(self$opencl_ids)) { args$opencl <- c("opencl", paste0("platform=", self$opencl_ids[1]), paste0("device=", self$opencl_ids[2])) } @@ -218,7 +226,8 @@ SampleArgs <- R6::R6Class( term_buffer = NULL, window = NULL, fixed_param = FALSE, - diagnostics = NULL) { + diagnostics = NULL, + save_metric = NULL) { self$iter_warmup <- iter_warmup self$iter_sampling <- iter_sampling @@ -232,6 +241,7 @@ SampleArgs <- R6::R6Class( self$inv_metric <- inv_metric self$fixed_param <- fixed_param self$diagnostics <- diagnostics + self$save_metric <- save_metric if (identical(self$diagnostics, "")) { self$diagnostics <- NULL } @@ -275,6 +285,9 @@ SampleArgs <- R6::R6Class( if (is.logical(self$save_warmup)) { self$save_warmup <- as.integer(self$save_warmup) } + if (is.logical(self$save_metric)) { + self$save_metric <- as.integer(self$save_metric) + } invisible(self) }, validate = function(num_procs) { @@ -314,7 +327,8 @@ SampleArgs <- R6::R6Class( .make_arg("adapt_engaged"), .make_arg("init_buffer"), .make_arg("term_buffer"), - .make_arg("window") + .make_arg("window"), + .make_arg("save_metric") ) } else { new_args <- list( @@ -335,7 +349,8 @@ SampleArgs <- R6::R6Class( .make_arg("adapt_engaged"), .make_arg("init_buffer"), .make_arg("term_buffer"), - .make_arg("window") + .make_arg("window"), + .make_arg("save_metric") ) } new_args <- do.call(c, new_args) @@ -682,6 +697,7 @@ validate_cmdstan_args <- function(self) { checkmate::assert_flag(self$save_latent_dynamics) checkmate::assert_integerish(self$refresh, lower = 0, null.ok = TRUE) checkmate::assert_integerish(self$sig_figs, lower = 1, upper = 18, null.ok = TRUE) + checkmate::assert_integerish(self$save_cmdstan_config, lower = 0, upper = 1, len = 1, null.ok = TRUE) if (!is.null(self$sig_figs) && cmdstan_version() < "2.25") { warning("The 'sig_figs' argument is only supported with cmdstan 2.25+ and will be ignored!", call. = FALSE) } @@ -799,6 +815,15 @@ validate_sample_args <- function(self, num_procs) { checkmate::assert_subset(self$diagnostics, empty.ok = FALSE, choices = available_hmc_diagnostics()) } + checkmate::assert_integerish(self$save_metric, + lower = 0, upper = 1, + len = 1, + null.ok = TRUE) + + if (is.null(self$adapt_engaged) || (!self$adapt_engaged && !is.null(self$save_metric))) { + self$save_metric <- 0 + } + invisible(TRUE) } diff --git a/R/fit.R b/R/fit.R index 68215608..0daf26f6 100644 --- a/R/fit.R +++ b/R/fit.R @@ -898,10 +898,13 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose) #' Save output and data files #' #' @name fit-method-save_output_files -#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files fit-method-save_profile_files -#' fit-method-output_files fit-method-data_file fit-method-latent_dynamics_files fit-method-profile_files -#' save_output_files save_data_file save_latent_dynamics_files save_profile_files -#' output_files data_file latent_dynamics_files profile_files +#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files +#' fit-method-save_profile_files fit-method-output_files fit-method-data_file +#' fit-method-latent_dynamics_files fit-method-profile_files +#' fit-method-save_config_files fit-method-save_metric_files save_output_files +#' save_data_file save_latent_dynamics_files save_profile_files +#' save_config_files save_metric_files output_files data_file +#' latent_dynamics_files profile_files config_files metric_files #' #' @description All fitted model objects have methods for saving (moving to a #' specified location) the files created by CmdStanR to hold CmdStan output @@ -936,6 +939,14 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose) #' `$save_output_files()` except `"-profile-"` is included in the new #' file name after `basename`. #' +#' For `$save_metric_files()` everything is the same as for +#' `$save_output_files()` except `"-metric-"` is included in the new +#' file name after `basename`. +#' +#' For `$save_config_files()` everything is the same as for +#' `$save_output_files()` except `"-config-"` is included in the new +#' file name after `basename`. +#' #' For `$save_data_file()` no `id` is included in the file name because even #' with multiple MCMC chains the data file is the same. #' @@ -998,6 +1009,26 @@ save_data_file <- function(dir = ".", } CmdStanFit$set("public", name = "save_data_file", value = save_data_file) +#' @rdname fit-method-save_output_files +save_config_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_config_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_config_files", value = save_config_files) + +#' @rdname fit-method-save_output_files +save_metric_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_metric_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_metric_files", value = save_metric_files) + + + #' @rdname fit-method-save_output_files #' @param include_failed (logical) Should CmdStan runs that failed also be #' included? The default is `FALSE.` @@ -1024,6 +1055,17 @@ data_file <- function() { } CmdStanFit$set("public", name = "data_file", value = data_file) +#' @rdname fit-method-save_output_files +config_files <- function(include_failed = FALSE) { + self$runset$config_files(include_failed) +} +CmdStanFit$set("public", name = "config_files", value = config_files) + +#' @rdname fit-method-save_output_files +metric_files <- function(include_failed = FALSE) { + self$runset$metric_files(include_failed) +} +CmdStanFit$set("public", name = "metric_files", value = metric_files) #' Report timing of CmdStan runs #' diff --git a/R/model.R b/R/model.R index 0b4ba47e..6aa5b010 100644 --- a/R/model.R +++ b/R/model.R @@ -1149,6 +1149,8 @@ sample <- function(data = NULL, show_messages = TRUE, show_exceptions = TRUE, diagnostics = c("divergences", "treedepth", "ebfmi"), + save_metric = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }, # deprecated cores = NULL, num_cores = NULL, @@ -1240,7 +1242,8 @@ sample <- function(data = NULL, term_buffer = term_buffer, window = window, fixed_param = fixed_param, - diagnostics = diagnostics + diagnostics = diagnostics, + save_metric = save_metric ) args <- CmdStanArgs$new( method_args = sample_args, @@ -1260,7 +1263,8 @@ sample <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1357,6 +1361,7 @@ sample_mpi <- function(data = NULL, show_messages = TRUE, show_exceptions = TRUE, diagnostics = c("divergences", "treedepth", "ebfmi"), + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }, # deprecated validate_csv = TRUE) { @@ -1420,7 +1425,8 @@ sample_mpi <- function(data = NULL, output_dir = output_dir, output_basename = output_basename, sig_figs = sig_figs, - model_variables = model_variables + model_variables = model_variables, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan_mpi(mpi_cmd, mpi_args) @@ -1500,7 +1506,8 @@ optimize <- function(data = NULL, tol_param = NULL, history_size = NULL, show_messages = TRUE, - show_exceptions = TRUE) { + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }) { procs <- CmdStanProcs$new( num_procs = 1, show_stderr_messages = show_exceptions, @@ -1541,7 +1548,8 @@ optimize <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1632,7 +1640,8 @@ laplace <- function(data = NULL, jacobian = TRUE, # different default than for optimize! draws = NULL, show_messages = TRUE, - show_exceptions = TRUE) { + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }) { if (cmdstan_version() < "2.32") { stop("This method is only available in cmdstan >= 2.32", call. = FALSE) } @@ -1706,7 +1715,8 @@ laplace <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1786,7 +1796,8 @@ variational <- function(data = NULL, output_samples = NULL, draws = NULL, show_messages = TRUE, - show_exceptions = TRUE) { + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }) { procs <- CmdStanProcs$new( num_procs = 1, show_stderr_messages = show_exceptions, @@ -1827,7 +1838,8 @@ variational <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1929,7 +1941,8 @@ pathfinder <- function(data = NULL, psis_resample = NULL, calculate_lp = NULL, show_messages = TRUE, - show_exceptions = TRUE) { + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { TRUE } else { NULL }) { procs <- CmdStanProcs$new( num_procs = 1, show_stderr_messages = show_exceptions, @@ -1976,7 +1989,8 @@ pathfinder <- function(data = NULL, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), model_variables = model_variables, - num_threads = num_threads + num_threads = num_threads, + save_cmdstan_config = save_cmdstan_config ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() diff --git a/R/run.R b/R/run.R index 9bce6c3e..c766f733 100644 --- a/R/run.R +++ b/R/run.R @@ -26,6 +26,12 @@ CmdStanRun <- R6::R6Class( if (cmdstan_version() >= "2.26.0") { private$profile_files_ <- self$new_profile_files() } + if (cmdstan_version() >= "2.34.0" && !is.null(self$args$save_cmdstan_config) && self$args$save_cmdstan_config) { + private$config_files_ <- self$new_config_files() + } + if (cmdstan_version() >= "2.34.0" && !is.null(self$args$method_args$save_metric) && self$args$method_args$save_metric) { + private$metric_files_ <- self$new_metric_files() + } if (self$args$save_latent_dynamics) { private$latent_dynamics_files_ <- self$new_latent_dynamics_files() } @@ -60,6 +66,48 @@ CmdStanRun <- R6::R6Class( new_profile_files = function() { self$args$new_files(type = "profile") }, + new_config_files = function() { + # because CmdStan 2.34 uses the output_file name as the base for the config file + paste0(tools::file_path_sans_ext(private$output_files_), "_config.json") + }, + new_metric_files = function() { + # because CmdStan 2.34 uses the output_file name as the base for the metric file + paste0(tools::file_path_sans_ext(private$output_files_), "_metric.json") + }, + config_files = function(include_failed = FALSE) { + files <- private$config_files_ + files_win_path <- sapply(private$config_files_, wsl_safe_path, revert = TRUE) + if (!length(files) || !any(file.exists(files_win_path))) { + stop( + "No CmdStan config files found. ", + "Set 'save_cmdstan_config=TRUE' when fitting the model.", + call. = FALSE + ) + } + if (include_failed) { + files + } else { + ok <- self$procs$is_finished() | self$procs$is_queued() + files[ok] + } + }, + metric_files = function(include_failed = FALSE) { + files <- private$metric_files_ + files_win_path <- sapply(private$metric_files_, wsl_safe_path, revert = TRUE) + if (!length(files) || !any(file.exists(files_win_path))) { + stop( + "No metric files found. ", + "Set 'save_metric=TRUE' when fitting the model.", + call. = FALSE + ) + } + if (include_failed) { + files + } else { + ok <- self$procs$is_finished() | self$procs$is_queued() + files[ok] + } + }, latent_dynamics_files = function(include_failed = FALSE) { if (!length(private$latent_dynamics_files_)) { stop( @@ -195,6 +243,54 @@ CmdStanRun <- R6::R6Class( "- ", new_path) invisible(new_path) }, + save_config_files = function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + current_files <- self$config_files(include_failed = TRUE) # used so we get error if 0 files + new_paths <- copy_temp_files( + current_paths = current_files, + new_dir = dir, + new_basename = paste0(basename %||% self$model_name(), "-config"), + ids = self$proc_ids(), + ext = ".json", + timestamp = timestamp, + random = random + ) + file.remove(current_files[!current_files %in% new_paths]) + private$config_files_ <- new_paths + message( + "Moved ", + length(current_files), + " files and set internal paths to new locations:\n", + paste("-", new_paths, collapse = "\n") + ) + invisible(new_paths) + }, + save_metric_files = function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + current_files <- self$metric_files(include_failed = TRUE) # used so we get error if 0 files + new_paths <- copy_temp_files( + current_paths = current_files, + new_dir = dir, + new_basename = paste0(basename %||% self$model_name(), "-metric"), + ids = self$proc_ids(), + ext = ".json", + timestamp = timestamp, + random = random + ) + file.remove(current_files[!current_files %in% new_paths]) + private$metric_files_ <- new_paths + message( + "Moved ", + length(current_files), + " files and set internal paths to new locations:\n", + paste("-", new_paths, collapse = "\n") + ) + invisible(new_paths) + }, command = function() self$args$command(), command_args = function() { @@ -291,6 +387,10 @@ CmdStanRun <- R6::R6Class( latent_dynamics_files_ = NULL, latent_dynamics_files_saved_ = FALSE, profile_files_saved_ = FALSE, + config_files_ = NULL, + metric_files_ = NULL, + config_files_saved_ = FALSE, + metric_files_saved_ = FALSE, command_args_ = list(), finalize = function() { @@ -301,7 +401,17 @@ CmdStanRun <- R6::R6Class( if (self$args$save_latent_dynamics && !private$latent_dynamics_files_saved_) self$latent_dynamics_files(include_failed = TRUE), if (cmdstan_version() > "2.25.0" && !private$profile_files_saved_) - private$profile_files_ + private$profile_files_, + if (cmdstan_version() > "2.34.0" && + !is.null(self$args$save_cmdstan_config) && + self$args$save_cmdstan_config && + !private$config_files_saved_) + self$config_files(include_failed = TRUE), + if (cmdstan_version() > "2.34.0" && + !(is.null(self$args$method_args$save_metric)) && + self$args$method_args$save_metric && + !private$metric_files_saved_) + self$metric_files(include_failed = TRUE) ) unlink(temp_files) } diff --git a/R/utils.R b/R/utils.R index f15f2002..a24d7e6f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -103,7 +103,7 @@ stanc_cmd <- function() { # paths and extensions ---------------------------------------------------- -# Replace `\\` with `/` in a path +# Replace `\\` with `/` in a vector of paths # Needed for windows if CmdStan version is < 2.21: # https://github.com/stan-dev/cmdstanr/issues/1#issuecomment-539118598 repair_path <- function(path) { @@ -114,10 +114,8 @@ repair_path <- function(path) { path <- gsub("\\\\", "/", path) # WSL cmdstan path is a network path and needs the leading // path <- gsub("//(?!wsl)", "/", path, perl = TRUE) - if (endsWith(path, "/")) { - # remove trailing "/" - path <- substr(path, 1, nchar(path) - 1) - } + # remove trailing "/" + path <- gsub("/$","", path) path } diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R index 6b60da19..df687908 100644 --- a/man-roxygen/model-common-args.R +++ b/man-roxygen/model-common-args.R @@ -1,48 +1,48 @@ #' @param data (multiple options) The data to use for the variables specified in #' the data block of the Stan program. One of the following: #' * A named list of \R objects with the names corresponding to variables -#' declared in the data block of the Stan program. Internally this list is then -#' written to JSON for CmdStan using [write_stan_json()]. See -#' [write_stan_json()] for details on the conversions performed on \R objects -#' before they are passed to Stan. +#' declared in the data block of the Stan program. Internally this list is +#' then written to JSON for CmdStan using [write_stan_json()]. See +#' [write_stan_json()] for details on the conversions performed on \R objects +#' before they are passed to Stan. #' * A path to a data file compatible with CmdStan (JSON or \R dump). See the -#' appendices in the CmdStan guide for details on using these formats. +#' appendices in the CmdStan guide for details on using these formats. #' * `NULL` or an empty list if the Stan program has no data block. #' #' @param seed (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. #' In the case of multi-chain sampling the single `seed` will automatically be #' augmented by the the run (chain) ID so that each chain uses a different -#' seed. The exception is the transformed data block, which defaults to -#' using same seed for all chains so that the same data is generated for all -#' chains if RNG functions are used. The only time `seed` should be specified -#' as a vector (one element per chain) is if RNG functions are used in -#' transformed data and the goal is to generate *different* data for each -#' chain. +#' seed. The exception is the transformed data block, which defaults to using +#' same seed for all chains so that the same data is generated for all chains +#' if RNG functions are used. The only time `seed` should be specified as a +#' vector (one element per chain) is if RNG functions are used in transformed +#' data and the goal is to generate *different* data for each chain. #' #' @param refresh (non-negative integer) The number of iterations between #' printed screen updates. If `refresh = 0`, only error messages will be #' printed. #' #' @param init (multiple options) The initialization method to use for the -#' variables declared in the parameters block of the Stan program. One of -#' the following: +#' variables declared in the parameters block of the Stan program. One of the +#' following: #' * A real number `x>0`. This initializes _all_ parameters randomly between -#' `[-x,x]` on the _unconstrained_ parameter space.; +#' `[-x,x]` on the _unconstrained_ parameter space.; #' * The number `0`. This initializes _all_ parameters to `0`; #' * A character vector of paths (one per chain) to JSON or Rdump files -#' containing initial values for all or some parameters. See -#' [write_stan_json()] to write \R objects to JSON files compatible with -#' CmdStan. +#' containing initial values for all or some parameters. See +#' [write_stan_json()] to write \R objects to JSON files compatible with +#' CmdStan. #' * A list of lists containing initial values for all or some parameters. For -#' MCMC the list should contain a sublist for each chain. For other model -#' fitting methods there should be just one sublist. The sublists should have -#' named elements corresponding to the parameters for which you are specifying -#' initial values. See **Examples**. +#' MCMC the list should contain a sublist for each chain. For other model +#' fitting methods there should be just one sublist. The sublists should have +#' named elements corresponding to the parameters for which you are specifying +#' initial values. See **Examples**. #' * A function that returns a single list with names corresponding to the -#' parameters for which you are specifying initial values. The function can -#' take no arguments or a single argument `chain_id`. For MCMC, if the function -#' has argument `chain_id` it will be supplied with the chain id (from 1 to -#' number of chains) when called to generate the initial values. See +#' parameters for which you are specifying initial values. The function can +#' take no arguments or a single argument `chain_id`. For MCMC, if the +#' function has argument `chain_id` it will be supplied with the chain id +#' (from 1 to number of chains) when called to generate the initial values. +#' See #' **Examples**. #' * A [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanPathfinder`], #' or [`CmdStanLaplace`] fit object. @@ -74,8 +74,8 @@ #' about the latent dynamics be written to temporary diagnostic CSV files? #' This argument replaces CmdStan's `diagnostic_file` argument and the content #' written to CSV is controlled by the user's CmdStan installation and not -#' CmdStanR (for some algorithms no content may be written). The default -#' is `FALSE`, which is appropriate for almost every use case. To save the +#' CmdStanR (for some algorithms no content may be written). The default is +#' `FALSE`, which is appropriate for almost every use case. To save the #' temporary files created when `save_latent_dynamics=TRUE` see the #' [`$save_latent_dynamics_files()`][fit-method-save_latent_dynamics_files] #' method. @@ -91,8 +91,8 @@ #' directory and only saved permanently if the user calls one of the `$save_*` #' methods of the fitted model object (e.g., #' [`$save_output_files()`][fit-method-save_output_files]). These temporary -#' files are removed when the fitted model object is -#' [garbage collected][base::gc] (manually or automatically). +#' files are removed when the fitted model object is [garbage +#' collected][base::gc] (manually or automatically). #' * If a path, then the files are created in `output_dir` with names #' corresponding to the defaults used by `$save_output_files()`. #' @@ -107,15 +107,15 @@ #' Increasing this value will result in larger output CSV files and thus an #' increased usage of disk space. #' -#' @param opencl_ids (integer vector of length 2) The platform and -#' device IDs of the OpenCL device to use for fitting. The model must -#' be compiled with `cpp_options = list(stan_opencl = TRUE)` for this -#' argument to have an effect. +#' @param opencl_ids (integer vector of length 2) The platform and device IDs of +#' the OpenCL device to use for fitting. The model must be compiled with +#' `cpp_options = list(stan_opencl = TRUE)` for this argument to have an +#' effect. #' -#' @param show_messages (logical) When `TRUE` (the default), prints all -#' output during the execution process, such as iteration numbers and elapsed times. -#' If the output is silenced then the [`$output()`][fit-method-output] method of -#' the resulting fit object can be used to display the silenced messages. +#' @param show_messages (logical) When `TRUE` (the default), prints all output +#' during the execution process, such as iteration numbers and elapsed times. +#' If the output is silenced then the [`$output()`][fit-method-output] method +#' of the resulting fit object can be used to display the silenced messages. #' #' @param show_exceptions (logical) When `TRUE` (the default), prints all #' informational messages, for example rejection of the current proposal. @@ -125,3 +125,8 @@ #' [`$output()`][fit-method-output] method of the resulting fit object can be #' used to display the silenced messages. #' +#' @param save_cmdstan_config (logical) When `TRUE` (the default), call CmdStan +#' with argument `"output save_config=1"` to save a json file which contains +#' the argument tree and extra information (equivalent to the output CSV file +#' header). This option is only available in CmdStan 2.34.0 and later. +#' diff --git a/man-roxygen/model-sample-args.R b/man-roxygen/model-sample-args.R index 240300a8..fe4e1014 100644 --- a/man-roxygen/model-sample-args.R +++ b/man-roxygen/model-sample-args.R @@ -15,8 +15,8 @@ #' contrast with `parallel_chains`, which specifies the number of chains to #' run in parallel. The actual number of CPU cores used is #' `parallel_chains*threads_per_chain`. For an example of using threading see -#' the Stan case study -#' [Reduce Sum: A Minimal Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). +#' the Stan case study [Reduce Sum: A Minimal +#' Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). #' #' @param iter_sampling (positive integer) The number of post-warmup iterations #' to run per chain. Note: in the CmdStan User's Guide this is referred to as @@ -46,13 +46,13 @@ #' specifying the geometry of the base manifold. See the _Euclidean Metric_ #' section of the CmdStan User's Guide for more details. To specify a #' precomputed (inverse) metric, see the `inv_metric` argument below. -#' @param metric_file (character vector) The paths to JSON or -#' Rdump files (one per chain) compatible with CmdStan that contain -#' precomputed inverse metrics. The `metric_file` argument is inherited from -#' CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be -#' named `inv_metric`, referring to the _inverse_ metric. We recommend instead -#' using CmdStanR's `inv_metric` argument (see below) to specify an inverse -#' metric directly using a vector or matrix from your \R session. +#' @param metric_file (character vector) The paths to JSON or Rdump files (one +#' per chain) compatible with CmdStan that contain precomputed inverse +#' metrics. The `metric_file` argument is inherited from CmdStan but is +#' confusing in that the entry in JSON or Rdump file(s) must be named +#' `inv_metric`, referring to the _inverse_ metric. We recommend instead using +#' CmdStanR's `inv_metric` argument (see below) to specify an inverse metric +#' directly using a vector or matrix from your \R session. #' @param inv_metric (vector, matrix) A vector (if `metric='diag_e'`) or a #' matrix (if `metric='dense_e'`) for initializing the inverse metric. This #' can be used as an alternative to the `metric_file` argument. A vector is @@ -79,8 +79,8 @@ #' `NULL` can be used to prevent CmdStanR from automatically reading in the #' sampler diagnostics from CSV if you wish to manually read in the results #' and validate them yourself, for example using [read_cmdstan_csv()]. The -#' currently available diagnostics are `"divergences"`, `"treedepth"`, -#' and `"ebfmi"` (the default is to check all of them). +#' currently available diagnostics are `"divergences"`, `"treedepth"`, and +#' `"ebfmi"` (the default is to check all of them). #' #' These diagnostics are also available after fitting. The #' [`$sampler_diagnostics()`][fit-method-sampler_diagnostics] method provides @@ -91,4 +91,8 @@ #' Diagnostics like R-hat and effective sample size are _not_ currently #' available via the `diagnostics` argument but can be checked after fitting #' using the [`$summary()`][fit-method-summary] method. +#' @param save_metric (logical) When `TRUE`, call CmdStan with argument +#' `"adaptation save_metric=1"` to save the adapted metric in separate JSON +#' file with elements "stepsize", "metric_type" and "inv_metric". The default +#' is `TRUE`. This option is only available in CmdStan 2.34.0 and later. #' diff --git a/man/fit-method-save_output_files.Rd b/man/fit-method-save_output_files.Rd index 64b86a75..beb1e35b 100644 --- a/man/fit-method-save_output_files.Rd +++ b/man/fit-method-save_output_files.Rd @@ -10,13 +10,19 @@ \alias{fit-method-data_file} \alias{fit-method-latent_dynamics_files} \alias{fit-method-profile_files} +\alias{fit-method-save_config_files} +\alias{fit-method-save_metric_files} \alias{save_data_file} \alias{save_latent_dynamics_files} \alias{save_profile_files} +\alias{save_config_files} +\alias{save_metric_files} \alias{output_files} \alias{data_file} \alias{latent_dynamics_files} \alias{profile_files} +\alias{config_files} +\alias{metric_files} \title{Save output and data files} \usage{ save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) @@ -32,6 +38,10 @@ save_profile_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) +save_config_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) + +save_metric_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) + output_files(include_failed = FALSE) profile_files(include_failed = FALSE) @@ -39,6 +49,10 @@ profile_files(include_failed = FALSE) latent_dynamics_files(include_failed = FALSE) data_file() + +config_files(include_failed = FALSE) + +metric_files(include_failed = FALSE) } \arguments{ \item{dir}{(string) Path to directory where the files should be saved.} @@ -93,6 +107,14 @@ For \verb{$save_profile_files()} everything is the same as for \verb{$save_output_files()} except \code{"-profile-"} is included in the new file name after \code{basename}. +For \verb{$save_metric_files()} everything is the same as for +\verb{$save_output_files()} except \code{"-metric-"} is included in the new +file name after \code{basename}. + +For \verb{$save_config_files()} everything is the same as for +\verb{$save_output_files()} except \code{"-config-"} is included in the new +file name after \code{basename}. + For \verb{$save_data_file()} no \code{id} is included in the file name because even with multiple MCMC chains the data file is the same. } diff --git a/man/model-method-diagnose.Rd b/man/model-method-diagnose.Rd index 1deab91d..c7117ef1 100644 --- a/man/model-method-diagnose.Rd +++ b/man/model-method-diagnose.Rd @@ -20,8 +20,8 @@ diagnose( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -32,16 +32,15 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -57,9 +56,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -99,8 +99,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd index 21ce47a2..2ec03d58 100644 --- a/man/model-method-generate-quantities.Rd +++ b/man/model-method-generate-quantities.Rd @@ -38,8 +38,8 @@ paths to \verb{$generate_quantities()} as many times as needed.} the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -50,12 +50,11 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{output_dir}{(string) A path to a directory where CmdStan should write its output CSV files. For MCMC there will be one file per chain; for other @@ -69,8 +68,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -99,13 +97,12 @@ using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in contrast with \code{parallel_chains}, which specifies the number of chains to run in parallel. The actual number of CPU cores used is \code{parallel_chains*threads_per_chain}. For an example of using threading see -the Stan case study -\href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} +the Stan case study \href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} } \value{ A \code{\link{CmdStanGQ}} object. diff --git a/man/model-method-laplace.Rd b/man/model-method-laplace.Rd index 0bb8bce8..2b658efa 100644 --- a/man/model-method-laplace.Rd +++ b/man/model-method-laplace.Rd @@ -21,7 +21,13 @@ laplace( jacobian = TRUE, draws = NULL, show_messages = TRUE, - show_exceptions = TRUE + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + } ) } \arguments{ @@ -29,8 +35,8 @@ laplace( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -41,20 +47,19 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -70,9 +75,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -114,8 +120,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -136,10 +141,10 @@ increased usage of disk space.} threads to use in parallelized sections (e.g., when using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} \item{mode}{(multiple options) The mode to center the approximation at. One of the following: @@ -167,10 +172,10 @@ optimization was originally run. If \code{mode} is \code{NULL} then the value of \item{draws}{(positive integer) The number of draws to take.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -179,6 +184,11 @@ recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the \code{\link[=fit-method-output]{$output()}} method of the resulting fit object can be used to display the silenced messages.} + +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} } \value{ A \code{\link{CmdStanLaplace}} object. diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index 3718dac3..55d7758f 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -27,7 +27,13 @@ optimize( tol_param = NULL, history_size = NULL, show_messages = TRUE, - show_exceptions = TRUE + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + } ) } \arguments{ @@ -35,8 +41,8 @@ optimize( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -47,20 +53,19 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -76,9 +81,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -110,8 +116,8 @@ that are the same name and dimensions as the current Stan model. about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (for some algorithms no content may be written). The default -is \code{FALSE}, which is appropriate for almost every use case. To save the +CmdStanR (for some algorithms no content may be written). The default is +\code{FALSE}, which is appropriate for almost every use case. To save the temporary files created when \code{save_latent_dynamics=TRUE} see the \code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method.} @@ -128,8 +134,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -150,10 +155,10 @@ increased usage of disk space.} threads to use in parallelized sections (e.g., when using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} \item{algorithm}{(string) The optimization algorithm. One of \code{"lbfgs"}, \code{"bfgs"}, or \code{"newton"}. The control parameters below are only available @@ -187,10 +192,10 @@ argument should typically be set to \code{TRUE}.} \item{history_size}{(positive integer) The size of the history used when approximating the Hessian. Only available for L-BFGS.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -199,6 +204,11 @@ recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the \code{\link[=fit-method-output]{$output()}} method of the resulting fit object can be used to display the silenced messages.} + +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} } \value{ A \code{\link{CmdStanMLE}} object. diff --git a/man/model-method-pathfinder.Rd b/man/model-method-pathfinder.Rd index 1cda0502..61590cbb 100644 --- a/man/model-method-pathfinder.Rd +++ b/man/model-method-pathfinder.Rd @@ -32,7 +32,13 @@ pathfinder( psis_resample = NULL, calculate_lp = NULL, show_messages = TRUE, - show_exceptions = TRUE + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + } ) } \arguments{ @@ -40,8 +46,8 @@ pathfinder( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -52,20 +58,19 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -81,9 +86,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -115,8 +121,8 @@ that are the same name and dimensions as the current Stan model. about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (for some algorithms no content may be written). The default -is \code{FALSE}, which is appropriate for almost every use case. To save the +CmdStanR (for some algorithms no content may be written). The default is +\code{FALSE}, which is appropriate for almost every use case. To save the temporary files created when \code{save_latent_dynamics=TRUE} see the \code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method.} @@ -133,8 +139,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -150,10 +155,10 @@ values with 6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing this value will result in larger output CSV files and thus an increased usage of disk space.} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} \item{num_threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of @@ -206,10 +211,10 @@ ELBO in the pathfinder steps. All other draws will have a log probability of \co A value of \code{FALSE} will also turn off pareto smoothed importance sampling as the lp calculation is needed for PSIS.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -218,6 +223,11 @@ recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the \code{\link[=fit-method-output]{$output()}} method of the resulting fit object can be used to display the silenced messages.} + +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} } \value{ A \code{\link{CmdStanPathfinder}} object. diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index f6d105d1..bc5b2a8c 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -37,6 +37,17 @@ sample( show_messages = TRUE, show_exceptions = TRUE, diagnostics = c("divergences", "treedepth", "ebfmi"), + save_metric = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + NULL + }, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + }, cores = NULL, num_cores = NULL, num_chains = NULL, @@ -53,8 +64,8 @@ sample( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -65,20 +76,19 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -94,9 +104,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -128,8 +139,8 @@ that are the same name and dimensions as the current Stan model. about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (for some algorithms no content may be written). The default -is \code{FALSE}, which is appropriate for almost every use case. To save the +CmdStanR (for some algorithms no content may be written). The default is +\code{FALSE}, which is appropriate for almost every use case. To save the temporary files created when \code{save_latent_dynamics=TRUE} see the \code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method.} @@ -146,8 +157,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -183,13 +193,12 @@ using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in contrast with \code{parallel_chains}, which specifies the number of chains to run in parallel. The actual number of CPU cores used is \code{parallel_chains*threads_per_chain}. For an example of using threading see -the Stan case study -\href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} +the Stan case study \href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} \item{iter_warmup}{(positive integer) The number of warmup iterations to run per chain. Note: in the CmdStan User's Guide this is referred to as @@ -228,13 +237,13 @@ specifying the geometry of the base manifold. See the \emph{Euclidean Metric} section of the CmdStan User's Guide for more details. To specify a precomputed (inverse) metric, see the \code{inv_metric} argument below.} -\item{metric_file}{(character vector) The paths to JSON or -Rdump files (one per chain) compatible with CmdStan that contain -precomputed inverse metrics. The \code{metric_file} argument is inherited from -CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be -named \code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead -using CmdStanR's \code{inv_metric} argument (see below) to specify an inverse -metric directly using a vector or matrix from your \R session.} +\item{metric_file}{(character vector) The paths to JSON or Rdump files (one +per chain) compatible with CmdStan that contain precomputed inverse +metrics. The \code{metric_file} argument is inherited from CmdStan but is +confusing in that the entry in JSON or Rdump file(s) must be named +\code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead using +CmdStanR's \code{inv_metric} argument (see below) to specify an inverse metric +directly using a vector or matrix from your \R session.} \item{inv_metric}{(vector, matrix) A vector (if \code{metric='diag_e'}) or a matrix (if \code{metric='dense_e'}) for initializing the inverse metric. This @@ -262,10 +271,10 @@ quantities block. If the parameters block is empty then using \code{fixed_param=TRUE} is mandatory. When \code{fixed_param=TRUE} the \code{chains} and \code{parallel_chains} arguments will be set to \code{1}.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -280,8 +289,8 @@ and warn about after sampling. Setting this to an empty string \code{""} or \code{NULL} can be used to prevent CmdStanR from automatically reading in the sampler diagnostics from CSV if you wish to manually read in the results and validate them yourself, for example using \code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}. The -currently available diagnostics are \code{"divergences"}, \code{"treedepth"}, -and \code{"ebfmi"} (the default is to check all of them). +currently available diagnostics are \code{"divergences"}, \code{"treedepth"}, and +\code{"ebfmi"} (the default is to check all of them). These diagnostics are also available after fitting. The \code{\link[=fit-method-sampler_diagnostics]{$sampler_diagnostics()}} method provides @@ -293,6 +302,16 @@ Diagnostics like R-hat and effective sample size are \emph{not} currently available via the \code{diagnostics} argument but can be checked after fitting using the \code{\link[=fit-method-summary]{$summary()}} method.} +\item{save_metric}{(logical) When \code{TRUE}, call CmdStan with argument +\code{"adaptation save_metric=1"} to save the adapted metric in separate JSON +file with elements "stepsize", "metric_type" and "inv_metric". The default +is \code{TRUE}. This option is only available in CmdStan 2.34.0 and later.} + +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} + \item{cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize, validate_csv}{Deprecated and will be removed in a future release.} } \value{ diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index 34083bd2..5f10b2a6 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -36,6 +36,12 @@ sample_mpi( show_messages = TRUE, show_exceptions = TRUE, diagnostics = c("divergences", "treedepth", "ebfmi"), + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + }, validate_csv = TRUE ) } @@ -44,8 +50,8 @@ sample_mpi( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -64,20 +70,19 @@ model executable.} \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -93,9 +98,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -127,8 +133,8 @@ that are the same name and dimensions as the current Stan model. about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (for some algorithms no content may be written). The default -is \code{FALSE}, which is appropriate for almost every use case. To save the +CmdStanR (for some algorithms no content may be written). The default is +\code{FALSE}, which is appropriate for almost every use case. To save the temporary files created when \code{save_latent_dynamics=TRUE} see the \code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method.} @@ -145,8 +151,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -200,13 +205,13 @@ specifying the geometry of the base manifold. See the \emph{Euclidean Metric} section of the CmdStan User's Guide for more details. To specify a precomputed (inverse) metric, see the \code{inv_metric} argument below.} -\item{metric_file}{(character vector) The paths to JSON or -Rdump files (one per chain) compatible with CmdStan that contain -precomputed inverse metrics. The \code{metric_file} argument is inherited from -CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be -named \code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead -using CmdStanR's \code{inv_metric} argument (see below) to specify an inverse -metric directly using a vector or matrix from your \R session.} +\item{metric_file}{(character vector) The paths to JSON or Rdump files (one +per chain) compatible with CmdStan that contain precomputed inverse +metrics. The \code{metric_file} argument is inherited from CmdStan but is +confusing in that the entry in JSON or Rdump file(s) must be named +\code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead using +CmdStanR's \code{inv_metric} argument (see below) to specify an inverse metric +directly using a vector or matrix from your \R session.} \item{inv_metric}{(vector, matrix) A vector (if \code{metric='diag_e'}) or a matrix (if \code{metric='dense_e'}) for initializing the inverse metric. This @@ -240,10 +245,10 @@ values with 6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing this value will result in larger output CSV files and thus an increased usage of disk space.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -258,8 +263,8 @@ and warn about after sampling. Setting this to an empty string \code{""} or \code{NULL} can be used to prevent CmdStanR from automatically reading in the sampler diagnostics from CSV if you wish to manually read in the results and validate them yourself, for example using \code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}. The -currently available diagnostics are \code{"divergences"}, \code{"treedepth"}, -and \code{"ebfmi"} (the default is to check all of them). +currently available diagnostics are \code{"divergences"}, \code{"treedepth"}, and +\code{"ebfmi"} (the default is to check all of them). These diagnostics are also available after fitting. The \code{\link[=fit-method-sampler_diagnostics]{$sampler_diagnostics()}} method provides @@ -271,6 +276,11 @@ Diagnostics like R-hat and effective sample size are \emph{not} currently available via the \code{diagnostics} argument but can be checked after fitting using the \code{\link[=fit-method-summary]{$summary()}} method.} +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} + \item{validate_csv}{Deprecated. Use \code{diagnostics} instead.} } \value{ diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index 1d0355ba..3892d886 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -28,7 +28,13 @@ variational( output_samples = NULL, draws = NULL, show_messages = TRUE, - show_exceptions = TRUE + show_exceptions = TRUE, + save_cmdstan_config = if (cmdstan_version() > "2.34.0") { + TRUE + } else { + + NULL + } ) } \arguments{ @@ -36,8 +42,8 @@ variational( the data block of the Stan program. One of the following: \itemize{ \item A named list of \R objects with the names corresponding to variables -declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See +declared in the data block of the Stan program. Internally this list is +then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. See \code{\link[=write_stan_json]{write_stan_json()}} for details on the conversions performed on \R objects before they are passed to Stan. \item A path to a data file compatible with CmdStan (JSON or \R dump). See the @@ -48,20 +54,19 @@ appendices in the CmdStan guide for details on using these formats. \item{seed}{(positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single \code{seed} will automatically be augmented by the the run (chain) ID so that each chain uses a different -seed. The exception is the transformed data block, which defaults to -using same seed for all chains so that the same data is generated for all -chains if RNG functions are used. The only time \code{seed} should be specified -as a vector (one element per chain) is if RNG functions are used in -transformed data and the goal is to generate \emph{different} data for each -chain.} +seed. The exception is the transformed data block, which defaults to using +same seed for all chains so that the same data is generated for all chains +if RNG functions are used. The only time \code{seed} should be specified as a +vector (one element per chain) is if RNG functions are used in transformed +data and the goal is to generate \emph{different} data for each chain.} \item{refresh}{(non-negative integer) The number of iterations between printed screen updates. If \code{refresh = 0}, only error messages will be printed.} \item{init}{(multiple options) The initialization method to use for the -variables declared in the parameters block of the Stan program. One of -the following: +variables declared in the parameters block of the Stan program. One of the +following: \itemize{ \item A real number \code{x>0}. This initializes \emph{all} parameters randomly between \verb{[-x,x]} on the \emph{unconstrained} parameter space.; @@ -77,9 +82,10 @@ named elements corresponding to the parameters for which you are specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the parameters for which you are specifying initial values. The function can -take no arguments or a single argument \code{chain_id}. For MCMC, if the function -has argument \code{chain_id} it will be supplied with the chain id (from 1 to -number of chains) when called to generate the initial values. See +take no arguments or a single argument \code{chain_id}. For MCMC, if the +function has argument \code{chain_id} it will be supplied with the chain id +(from 1 to number of chains) when called to generate the initial values. +See \strong{Examples}. \item A \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanPathfinder}}, or \code{\link{CmdStanLaplace}} fit object. @@ -111,8 +117,8 @@ that are the same name and dimensions as the current Stan model. about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (for some algorithms no content may be written). The default -is \code{FALSE}, which is appropriate for almost every use case. To save the +CmdStanR (for some algorithms no content may be written). The default is +\code{FALSE}, which is appropriate for almost every use case. To save the temporary files created when \code{save_latent_dynamics=TRUE} see the \code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method.} @@ -129,8 +135,7 @@ fitted model objects. This can be set for an entire \R session using directory and only saved permanently if the user calls one of the \verb{$save_*} methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is -\link[base:gc]{garbage collected} (manually or automatically). +files are removed when the fitted model object is \link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names corresponding to the defaults used by \verb{$save_output_files()}. }} @@ -151,10 +156,10 @@ increased usage of disk space.} threads to use in parallelized sections (e.g., when using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} -\item{opencl_ids}{(integer vector of length 2) The platform and -device IDs of the OpenCL device to use for fitting. The model must -be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this -argument to have an effect.} +\item{opencl_ids}{(integer vector of length 2) The platform and device IDs of +the OpenCL device to use for fitting. The model must be compiled with +\code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an +effect.} \item{algorithm}{(string) The algorithm. Either \code{"meanfield"} or \code{"fullrank"}.} @@ -186,10 +191,10 @@ of the objective.} \item{draws}{(positive integer) Number of approximate posterior samples to draw and save.} -\item{show_messages}{(logical) When \code{TRUE} (the default), prints all -output during the execution process, such as iteration numbers and elapsed times. -If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method of -the resulting fit object can be used to display the silenced messages.} +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all output +during the execution process, such as iteration numbers and elapsed times. +If the output is silenced then the \code{\link[=fit-method-output]{$output()}} method +of the resulting fit object can be used to display the silenced messages.} \item{show_exceptions}{(logical) When \code{TRUE} (the default), prints all informational messages, for example rejection of the current proposal. @@ -198,6 +203,11 @@ recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the \code{\link[=fit-method-output]{$output()}} method of the resulting fit object can be used to display the silenced messages.} + +\item{save_cmdstan_config}{(logical) When \code{TRUE} (the default), call CmdStan +with argument \code{"output save_config=1"} to save a json file which contains +the argument tree and extra information (equivalent to the output CSV file +header). This option is only available in CmdStan 2.34.0 and later.} } \value{ A \code{\link{CmdStanVB}} object. diff --git a/tests/testthat/test-model-output_dir.R b/tests/testthat/test-model-output_dir.R index 4870051c..6dddd75e 100644 --- a/tests/testthat/test-model-output_dir.R +++ b/tests/testthat/test-model-output_dir.R @@ -8,6 +8,7 @@ if (getRversion() < '3.5.0') { } if (!dir.exists(sandbox)) { dir.create(sandbox) + on.exit(unlink(sandbox, recursive = TRUE)) } test_that("all fitting methods work with output_dir", { @@ -15,6 +16,7 @@ test_that("all fitting methods work with output_dir", { method_dir <- file.path(sandbox, method) if (!dir.exists(method_dir)) { dir.create(method_dir) + on.exit(unlink(method_dir, recursive = TRUE)) } # WSL models use internal WSL tempdir @@ -22,6 +24,7 @@ test_that("all fitting methods work with output_dir", { # no output_dir means should use tempdir fit <- testing_fit("bernoulli", method = method, seed = 123) expect_equal(fit$runset$args$output_dir, absolute_path(tempdir())) + files <- list.files(method_dir) } # specifying output_dir fit <- testing_fit("bernoulli", method = method, seed = 123, @@ -30,7 +33,26 @@ test_that("all fitting methods work with output_dir", { # from the original tempdir(), so need to normalise both for comparison expect_equal(normalizePath(fit$runset$args$output_dir), normalizePath(method_dir)) - expect_equal(length(list.files(method_dir)), fit$num_procs()) + files <- normalizePath(list.files(method_dir, full.names = TRUE)) + # in 2.34.0 we also save the config files for all methods and the metric + # for sample + if (cmdstan_version() < "2.34.0") { + mult <- 1 + } else if (method == "sample") { + mult <- 3 + expect_equal(files[grepl("metric", files)], + normalizePath(sapply(fit$metric_files(), wsl_safe_path, revert = TRUE, + USE.NAMES = FALSE))) + expect_equal(files[grepl("config", files)], + normalizePath(sapply(fit$config_files(), wsl_safe_path, revert = TRUE, + USE.NAMES = FALSE))) + } else { + mult <- 2 + expect_equal(files[grepl("config", files)], + normalizePath(sapply(fit$config_files(), wsl_safe_path, revert = TRUE, + USE.NAMES = FALSE))) + } + expect_equal(length(list.files(method_dir)), mult * fit$num_procs()) # specifying output_dir @@ -87,5 +109,7 @@ test_that("output_dir works with trailing /", { ) expect_equal(normalizePath(fit$runset$args$output_dir), normalizePath(test_dir)) - expect_equal(length(list.files(test_dir)), fit$num_procs()) + # in 2.34.0 we also save the metric and config files + mult <- if (cmdstan_version() >= "2.34.0") 3 else 1 + expect_equal(length(list.files(test_dir)), mult * fit$num_procs()) }) diff --git a/tests/testthat/test-model-sample.R b/tests/testthat/test-model-sample.R index 1810834f..8afe3053 100644 --- a/tests/testthat/test-model-sample.R +++ b/tests/testthat/test-model-sample.R @@ -32,7 +32,9 @@ ok_arg_values <- list( save_latent_dynamics = FALSE, init_buffer = 20, term_buffer = 0, - window = 15 + window = 15, + save_metric = TRUE, + save_cmdstan_config = TRUE ) # using any one of these should cause sample() to error @@ -56,7 +58,9 @@ bad_arg_values <- list( save_latent_dynamics = "NOT_LOGICAL", init_buffer = "NOT_INTEGER", term_buffer = "NOT_INTEGER", - window = "NOT_INTEGER" + window = "NOT_INTEGER", + save_metric = "NOT_LOGICAL", + save_cmdstan_config = "NOT_LOGICAL" ) bad_arg_values_2 <- list( @@ -208,6 +212,12 @@ test_that("sample() method runs when fixed_param = TRUE", { expect_equal(fit_500_w$metadata()$algorithm, "fixed_param") }) +test_that("sample() method runs when adapt_engaged = FALSE", { + expect_sample_output(fit <- mod$sample(data = data_list, chains = 1, adapt_engaged = FALSE), 1) + draws <- try(fit$draws(), silent = TRUE) + expect_false(inherits(draws, "try-error")) +}) + test_that("chain_ids work with sample()", { mod$compile() expect_sample_output(fit12 <- mod$sample(data = data_list, chains = 2, chain_ids = c(10,12))) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 180f7ded..6beedc4f 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -150,6 +150,10 @@ test_that("repair_path works with zero length path or non-string path", { expect_equal(repair_path(5), 5) }) +test_that("repair_path works with multiple paths", { + expect_equal(repair_path(c("a//b\\c/", "d\\e//f")), c("a/b/c", "d/e/f")) +}) + test_that("list_to_array works with empty list", { expect_equal(list_to_array(list()), NULL) })