From d34b77ef0c196a292dec660663975fbb8be0a904 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 5 May 2024 00:55:55 +0300 Subject: [PATCH 1/3] Improve efficiency of model methods, tidy code --- R/fit.R | 27 +++-------- R/utils.R | 7 --- inst/include/hessian.cpp | 41 ---------------- inst/include/model_methods.cpp | 75 +++++++++++++++++++++-------- tests/testthat/test-model-methods.R | 9 +--- 5 files changed, 63 insertions(+), 96 deletions(-) delete mode 100644 inst/include/hessian.cpp diff --git a/R/fit.R b/R/fit.R index 0daf26f6..a80db23d 100644 --- a/R/fit.R +++ b/R/fit.R @@ -518,12 +518,8 @@ unconstrain_variables <- function(variables) { " not provided!", call. = FALSE) } - # Remove zero-length parameters from model_variables, otherwise process_init - # warns about missing inputs - model_variables$parameters <- model_variables$parameters[nonzero_length_params] - - stan_pars <- process_init(list(variables), num_procs = 1, model_variables) - private$model_methods_env_$unconstrain_variables(private$model_methods_env_$model_ptr_, stan_pars) + variables_vector <- unlist(variables, recursive = TRUE, use.names = FALSE) + private$model_methods_env_$unconstrain_variables(private$model_methods_env_$model_ptr_, variables_vector) } CmdStanFit$set("public", name = "unconstrain_variables", value = unconstrain_variables) @@ -571,11 +567,11 @@ unconstrain_draws <- function(files = NULL, draws = NULL, call. = FALSE) } if (!is.null(files)) { - read_csv <- read_cmdstan_csv(files = files, format = "draws_df") + read_csv <- read_cmdstan_csv(files = files, format = "draws_matrix") draws <- read_csv$post_warmup_draws } if (!is.null(draws)) { - draws <- maybe_convert_draws_format(draws, "draws_df") + draws <- maybe_convert_draws_format(draws, "draws_matrix") } } else { if (is.null(private$draws_)) { @@ -584,7 +580,7 @@ unconstrain_draws <- function(files = NULL, draws = NULL, } private$read_csv_(format = "draws_df") } - draws <- maybe_convert_draws_format(private$draws_, "draws_df") + draws <- maybe_convert_draws_format(private$draws_, "draws_matrix") } model_par_names <- self$metadata()$stan_variables[self$metadata()$stan_variables != "lp__"] @@ -599,19 +595,10 @@ unconstrain_draws <- function(files = NULL, draws = NULL, pars <- names(model_variables$parameters[nonzero_length_params]) draws <- posterior::subset_draws(draws, variable = pars) - skeleton <- self$variable_skeleton(transformed_parameters = FALSE, - generated_quantities = FALSE) - par_columns <- !(names(draws) %in% c(".chain", ".iteration", ".draw")) - meta_columns <- !par_columns - unconstrained <- lapply(asplit(draws, 1), function(draw) { - par_list <- utils::relist(as.numeric(draw[par_columns]), skeleton) - self$unconstrain_variables(variables = par_list) - }) - - unconstrained <- do.call(rbind.data.frame, unconstrained) + unconstrained <- private$model_methods_env_$unconstrain_draws(private$model_methods_env_$model_ptr_, draws) uncon_names <- private$model_methods_env_$unconstrained_param_names(private$model_methods_env_$model_ptr_, FALSE, FALSE) names(unconstrained) <- repair_variable_names(uncon_names) - maybe_convert_draws_format(cbind.data.frame(unconstrained, draws[,meta_columns]), format) + maybe_convert_draws_format(unconstrained, format) } CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws) diff --git a/R/utils.R b/R/utils.R index a24d7e6f..37d77f79 100644 --- a/R/utils.R +++ b/R/utils.R @@ -757,13 +757,6 @@ expose_model_methods <- function(env, verbose = FALSE, hessian = FALSE) { readLines(system.file("include", "model_methods.cpp", package = "cmdstanr", mustWork = TRUE))) - if (hessian) { - code <- c("#include ", - code, - readLines(system.file("include", "hessian.cpp", - package = "cmdstanr", mustWork = TRUE))) - } - code <- paste(code, collapse = "\n") rcpp_source_stan(code, env, verbose) invisible(NULL) diff --git a/inst/include/hessian.cpp b/inst/include/hessian.cpp deleted file mode 100644 index 3883c130..00000000 --- a/inst/include/hessian.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include - -template -struct hessian_wrapper { - const M& model; - bool jac_adjust; - std::ostream* o; - - hessian_wrapper(const M& m, bool adj, std::ostream* out) : model(m), jac_adjust(adj), o(out) {} - - template - T operator()(const Eigen::Matrix& x) const { - if (jac_adjust) { - // log_prob() requires non-const but doesn't modify its argument - return model.template log_prob( - const_cast&>(x), o); - } else { - // log_prob() requires non-const but doesn't modify its argument - return model.template log_prob( - const_cast&>(x), o); - } - } -}; - -// [[Rcpp::export]] -Rcpp::List hessian(SEXP ext_model_ptr, Eigen::VectorXd& upars, bool jac_adjust) { - Rcpp::XPtr ptr(ext_model_ptr); - - double log_prob; - Eigen::VectorXd grad; - Eigen::MatrixXd hessian; - - stan::math::hessian(hessian_wrapper(*ptr.get(), jac_adjust, 0), - upars, log_prob, grad, hessian); - - return Rcpp::List::create( - Rcpp::Named("log_prob") = log_prob, - Rcpp::Named("grad_log_prob") = grad, - Rcpp::Named("hessian") = hessian); -} diff --git a/inst/include/model_methods.cpp b/inst/include/model_methods.cpp index 978eb3cf..70eeabf5 100644 --- a/inst/include/model_methods.cpp +++ b/inst/include/model_methods.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -26,10 +27,14 @@ using json_data_t = stan::json::json_data; return std::make_shared(data_context); } +stan::model::model_base& +new_model(stan::io::var_context& data_context, unsigned int seed, + std::ostream* msg_stream); + // [[Rcpp::export]] Rcpp::List model_ptr(std::string data_path, boost::uint32_t seed) { - Rcpp::XPtr ptr( - new stan_model(*var_context(data_path), seed, &Rcpp::Rcout) + Rcpp::XPtr ptr( + &new_model(*var_context(data_path), seed, &Rcpp::Rcout) ); Rcpp::XPtr base_rng(new boost::ecuyer1988(seed)); return Rcpp::List::create( @@ -39,37 +44,56 @@ Rcpp::List model_ptr(std::string data_path, boost::uint32_t seed) { } // [[Rcpp::export]] -double log_prob(SEXP ext_model_ptr, std::vector upars, - bool jac_adjust) { +double log_prob(SEXP ext_model_ptr, Eigen::VectorXd upars, bool jac_adjust) { Rcpp::XPtr ptr(ext_model_ptr); - std::vector params_i; if (jac_adjust) { - return stan::model::log_prob_propto(*ptr.get(), upars, params_i, &Rcpp::Rcout); + return stan::model::log_prob_propto(*ptr.get(), upars, &Rcpp::Rcout); } else { - return stan::model::log_prob_propto(*ptr.get(), upars, params_i, &Rcpp::Rcout); + return stan::model::log_prob_propto(*ptr.get(), upars, &Rcpp::Rcout); } } // [[Rcpp::export]] -Rcpp::NumericVector grad_log_prob(SEXP ext_model_ptr, std::vector upars, +Rcpp::NumericVector grad_log_prob(SEXP ext_model_ptr, Eigen::VectorXd upars, bool jac_adjust) { Rcpp::XPtr ptr(ext_model_ptr); - std::vector gradients; - std::vector params_i; + Eigen::VectorXd gradients; double lp; if (jac_adjust) { - lp = stan::model::log_prob_grad( - *ptr.get(), upars, params_i, gradients); + lp = stan::model::log_prob_grad(*ptr.get(), upars, gradients); } else { - lp = stan::model::log_prob_grad( - *ptr.get(), upars, params_i, gradients); + lp = stan::model::log_prob_grad(*ptr.get(), upars, gradients); } - Rcpp::NumericVector grad_rtn = Rcpp::wrap(gradients); + Rcpp::NumericVector grad_rtn(Rcpp::wrap(std::move(gradients))); grad_rtn.attr("log_prob") = lp; return grad_rtn; } +// [[Rcpp::export]] +Rcpp::List hessian(SEXP ext_model_ptr, Eigen::VectorXd upars, bool jacobian) { + Rcpp::XPtr ptr(ext_model_ptr); + + auto hessian_functor = [&](auto&& x) { + if (jacobian) { + return ptr->log_prob(x, 0); + } else { + return ptr->log_prob(x, 0); + } + }; + + double log_prob; + Eigen::VectorXd grad; + Eigen::MatrixXd hessian; + + stan::math::internal::finite_diff_hessian_auto(hessian_functor, upars, log_prob, grad, hessian); + + return Rcpp::List::create( + Rcpp::Named("log_prob") = log_prob, + Rcpp::Named("grad_log_prob") = grad, + Rcpp::Named("hessian") = hessian); +} + // [[Rcpp::export]] size_t get_num_upars(SEXP ext_model_ptr) { Rcpp::XPtr ptr(ext_model_ptr); @@ -95,12 +119,23 @@ Rcpp::List get_param_metadata(SEXP ext_model_ptr) { } // [[Rcpp::export]] -std::vector unconstrain_variables(SEXP ext_model_ptr, std::string init_path) { +Eigen::VectorXd unconstrain_variables(SEXP ext_model_ptr, Eigen::VectorXd variables) { Rcpp::XPtr ptr(ext_model_ptr); - std::vector params_i; - std::vector vars; - ptr->transform_inits(*var_context(init_path), params_i, vars, &Rcpp::Rcout); - return vars; + Eigen::VectorXd unconstrained_variables; + ptr->unconstrain_array(variables, unconstrained_variables, &Rcpp::Rcout); + return unconstrained_variables; +} + +// [[Rcpp::export]] +Eigen::MatrixXd unconstrain_draws(SEXP ext_model_ptr, Eigen::MatrixXd variables) { + Rcpp::XPtr ptr(ext_model_ptr); + Eigen::MatrixXd unconstrained_draws(variables.cols(), variables.rows()); + for (int i = 0; i < variables.rows(); i++) { + Eigen::VectorXd unconstrained_variables; + ptr->unconstrain_array(variables.transpose().col(i), unconstrained_variables, &Rcpp::Rcout); + unconstrained_draws.col(i) = unconstrained_variables; + } + return unconstrained_draws.transpose(); } // [[Rcpp::export]] diff --git a/tests/testthat/test-model-methods.R b/tests/testthat/test-model-methods.R index 9eded6be..276d0b26 100644 --- a/tests/testthat/test-model-methods.R +++ b/tests/testthat/test-model-methods.R @@ -34,17 +34,10 @@ test_that("Methods error if not compiled", { ) }) -test_that("User warned about higher-order autodiff with hessian", { - skip_if(os_is_wsl()) - expect_message( - fit$init_model_methods(hessian = TRUE, verbose = TRUE), - "The hessian method relies on higher-order autodiff which is still experimental. Please report any compilation errors that you encounter", - fixed = TRUE - ) -}) test_that("Methods return correct values", { skip_if(os_is_wsl()) + fit$init_model_methods(verbose = TRUE) lp <- fit$log_prob(unconstrained_variables=c(0.1)) expect_equal(lp, -8.6327599208828509347) From bf397e426b3fdfd07b9a28f946d4e0ce8a2f3f11 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 5 May 2024 17:29:08 +0300 Subject: [PATCH 2/3] Add handling for variable order, fix chains return in draws --- R/fit.R | 4 ++-- R/model.R | 7 ++++++- R/utils.R | 14 +++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/R/fit.R b/R/fit.R index a80db23d..c2a1f2bf 100644 --- a/R/fit.R +++ b/R/fit.R @@ -518,7 +518,7 @@ unconstrain_variables <- function(variables) { " not provided!", call. = FALSE) } - variables_vector <- unlist(variables, recursive = TRUE, use.names = FALSE) + variables_vector <- unlist(variables[model_par_names], recursive = TRUE) private$model_methods_env_$unconstrain_variables(private$model_methods_env_$model_ptr_, variables_vector) } CmdStanFit$set("public", name = "unconstrain_variables", value = unconstrain_variables) @@ -598,7 +598,7 @@ unconstrain_draws <- function(files = NULL, draws = NULL, unconstrained <- private$model_methods_env_$unconstrain_draws(private$model_methods_env_$model_ptr_, draws) uncon_names <- private$model_methods_env_$unconstrained_param_names(private$model_methods_env_$model_ptr_, FALSE, FALSE) names(unconstrained) <- repair_variable_names(uncon_names) - maybe_convert_draws_format(unconstrained, format) + maybe_convert_draws_format(unconstrained, format, .nchains = posterior::nchains(draws)) } CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws) diff --git a/R/model.R b/R/model.R index 6aa5b010..35489171 100644 --- a/R/model.R +++ b/R/model.R @@ -465,10 +465,10 @@ compile <- function(quiet = TRUE, stanc_options = list(), force_recompile = getOption("cmdstanr_force_recompile", default = FALSE), compile_model_methods = FALSE, - compile_hessian_method = FALSE, compile_standalone = FALSE, dry_run = FALSE, #deprecated + compile_hessian_method = FALSE, threads = FALSE) { if (length(self$stan_file()) == 0) { @@ -505,6 +505,11 @@ compile <- function(quiet = TRUE, cpp_options[["stan_threads"]] <- TRUE } + # temporary deprecation warnings + if (isTRUE(compile_hessian_method)) { + warning("'compile_hessian_method' is deprecated. The hessian method is compiled with all models.") + } + if (length(self$exe_file()) == 0) { if (is.null(dir)) { exe_base <- self$stan_file() diff --git a/R/utils.R b/R/utils.R index 37d77f79..8b0bc01a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -409,19 +409,19 @@ valid_draws_formats <- function() { "draws_rvars", "rvars") } -maybe_convert_draws_format <- function(draws, format) { +maybe_convert_draws_format <- function(draws, format, ...) { if (is.null(draws)) { return(draws) } format <- sub("^draws_", "", format) switch( format, - "array" = posterior::as_draws_array(draws), - "df" = posterior::as_draws_df(draws), - "data.frame" = posterior::as_draws_df(draws), - "list" = posterior::as_draws_list(draws), - "matrix" = posterior::as_draws_matrix(draws), - "rvars" = posterior::as_draws_rvars(draws), + "array" = posterior::as_draws_array(draws, ...), + "df" = posterior::as_draws_df(draws, ...), + "data.frame" = posterior::as_draws_df(draws, ...), + "list" = posterior::as_draws_list(draws, ...), + "matrix" = posterior::as_draws_matrix(draws, ...), + "rvars" = posterior::as_draws_rvars(draws, ...), stop("Invalid draws format.", call. = FALSE) ) } From fa1b10b87a25bd1f59eda3ffb1d44d1d0b9121cb Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 5 May 2024 21:42:09 +0300 Subject: [PATCH 3/3] Update doc --- man/model-method-compile.Rd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/model-method-compile.Rd b/man/model-method-compile.Rd index 7bfa47d7..e17b1ae5 100644 --- a/man/model-method-compile.Rd +++ b/man/model-method-compile.Rd @@ -15,9 +15,9 @@ compile( stanc_options = list(), force_recompile = getOption("cmdstanr_force_recompile", default = FALSE), compile_model_methods = FALSE, - compile_hessian_method = FALSE, compile_standalone = FALSE, dry_run = FALSE, + compile_hessian_method = FALSE, threads = FALSE ) } @@ -64,9 +64,6 @@ via a global \code{cmdstanr_force_recompile} option.} (\code{log_prob()}, \code{grad_log_prob()}, \code{constrain_variables()}, \code{unconstrain_variables()}).} -\item{compile_hessian_method}{(logical) Should the (experimental) \code{hessian()} method be -be compiled with the model methods?} - \item{compile_standalone}{(logical) Should functions in the Stan model be compiled for use in R? If \code{TRUE} the functions will be available via the \code{functions} field in the compiled model object. This can also be done after @@ -76,6 +73,9 @@ compilation using the \item{dry_run}{(logical) If \code{TRUE}, the code will do all checks before compilation, but skip the actual C++ compilation. Used to speedup tests.} +\item{compile_hessian_method}{(logical) Should the (experimental) \code{hessian()} method be +be compiled with the model methods?} + \item{threads}{Deprecated and will be removed in a future release. Please turn on threading via \code{cpp_options = list(stan_threads = TRUE)} instead.} }