From 3530c9941eb7f732b421b597f0d0ca4c9291e6bd Mon Sep 17 00:00:00 2001 From: n-kall Date: Mon, 11 Dec 2023 12:12:48 +0200 Subject: [PATCH 01/11] add automatic thinning of draws --- R/thin_draws.R | 32 ++++++++++++++++++++++----- man-roxygen/ref-sailynoja-ecdf-2022.R | 5 +++++ man/thin_draws.Rd | 16 ++++++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 man-roxygen/ref-sailynoja-ecdf-2022.R diff --git a/R/thin_draws.R b/R/thin_draws.R index 964c0b52..c5f845c4 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -4,8 +4,11 @@ #' #' @aliases thin #' @template args-methods-x -#' @param thin (positive integer) The period for selecting draws. +#' @param thin (positive integer) The period for selecting draws. If +#' omitted, this will be automatically calculated based on bulk-ESS +#' and tail-ESS as suggested by Säilynoja et al. (2022). #' @template args-methods-dots +#' @template ref-sailynoja-ecdf-2022 #' @template return-draws #' #' @examples @@ -16,13 +19,18 @@ #' niterations(x) #' #' @export -thin_draws <- function(x, thin, ...) { +thin_draws <- function(x, thin = NULL, ...) { UseMethod("thin_draws") } #' @rdname thin_draws #' @export -thin_draws.draws <- function(x, thin, ...) { +thin_draws.draws <- function(x, thin = NULL, ...) { + if (is.null(thin)) { + thin <- round(ess_based_thinning_all_vars(x)) + message("Automatically thinned by ", round(thin), " based on ESS.") + } + thin <- as_one_integer(thin) if (thin == 1L) { # no thinning requested @@ -32,7 +40,7 @@ thin_draws.draws <- function(x, thin, ...) { stop_no_call("'thin' must be a positive integer.") } niterations <- niterations(x) - if (thin > niterations ) { + if (thin > niterations) { stop_no_call("'thin' must be smaller than the total number of iterations.") } iteration_ids <- seq(1, niterations, by = thin) @@ -41,6 +49,20 @@ thin_draws.draws <- function(x, thin, ...) { #' @rdname thin_draws #' @export -thin_draws.rvar <- function(x, thin, ...) { +thin_draws.rvar <- function(x, thin = NULL, ...) { thin_draws(draws_rvars(x = x), thin, ...)$x } + +ess_based_thinning_all_vars <- function(x, ...) { + max(summarise_draws(x, thin = ess_based_thinning)$thin) +} + +ess_based_thinning <- function(x, ...) { + # thin based on mean (over chains) of minimum of tail and bulk ess + x <- as.matrix(x) + ess_tailbulk_chains <- apply(x, + MARGIN = 2, + FUN = function(x) min(SW(ess_tail(x)), SW(ess_bulk(x))) + ) + nrow(x) / mean(ess_tailbulk_chains) +} diff --git a/man-roxygen/ref-sailynoja-ecdf-2022.R b/man-roxygen/ref-sailynoja-ecdf-2022.R new file mode 100644 index 00000000..51708b7a --- /dev/null +++ b/man-roxygen/ref-sailynoja-ecdf-2022.R @@ -0,0 +1,5 @@ +#' @references +#' Teemu Säilynoja, Paul-Christian Bürkner, and Aki Vehtari (2022). +#' Graphical test for discrete uniformity and its applications in +#' goodness-of-fit evaluation and multiple sample comparison. +#' *Statistics and Computing*. 32, 32. doi:10.1007/s11222-022-10090-6 diff --git a/man/thin_draws.Rd b/man/thin_draws.Rd index b4d4facd..b7631e7d 100644 --- a/man/thin_draws.Rd +++ b/man/thin_draws.Rd @@ -7,17 +7,19 @@ \alias{thin_draws.rvar} \title{Thin \code{draws} objects} \usage{ -thin_draws(x, thin, ...) +thin_draws(x, thin = NULL, ...) -\method{thin_draws}{draws}(x, thin, ...) +\method{thin_draws}{draws}(x, thin = NULL, ...) -\method{thin_draws}{rvar}(x, thin, ...) +\method{thin_draws}{rvar}(x, thin = NULL, ...) } \arguments{ \item{x}{(draws) A \code{draws} object or another \R object for which the method is defined.} -\item{thin}{(positive integer) The period for selecting draws.} +\item{thin}{(positive integer) The period for selecting draws. If +omitted, this will be automatically calculated based on bulk-ESS +and tail-ESS as suggested by Säilynoja et al. (2022).} \item{...}{Arguments passed to individual methods (if applicable).} } @@ -35,3 +37,9 @@ x <- thin_draws(x, thin = 5) niterations(x) } +\references{ +Teemu Säilynoja, Paul-Christian Bürkner, and Aki Vehtari (2022). +Graphical test for discrete uniformity and its applications in +goodness-of-fit evaluation and multiple sample comparison. +\emph{Statistics and Computing}. 32, 32. doi:10.1007/s11222-022-10090-6 +} From c84fa37bfbe70418a88981fd4a46a94cda98718d Mon Sep 17 00:00:00 2001 From: n-kall Date: Mon, 11 Dec 2023 12:30:54 +0200 Subject: [PATCH 02/11] add test for auto thinning draws --- tests/testthat/test-thin_draws.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-thin_draws.R b/tests/testthat/test-thin_draws.R index 7b16abeb..aec7c40c 100644 --- a/tests/testthat/test-thin_draws.R +++ b/tests/testthat/test-thin_draws.R @@ -11,3 +11,15 @@ test_that("thin_draws works on rvars", { expect_equal(thin_draws(as_draws_rvars(x)$theta, 10L), as_draws_rvars(thin_draws(x, 10L))$theta) }) + +test_that("automatic thinning works as expected", { + x <- as_draws_array(example_draws()) + mu_1 <- subset_draws(x, variable = "mu", chain = 1) + + ess_tail_mu_1 <- ess_tail(mu_1) + ess_bulk_mu_1 <- ess_bulk(mu_1) + + thin_by <- round(ndraws(mu_1) / min(ess_tail_mu_1, ess_bulk_mu_1)) + expect_equal(thin_draws(mu_1), thin_draws(mu_1, thin = thin_by)) + +}) From 5ad56da237dac65af149c34557182cf1aded171d Mon Sep 17 00:00:00 2001 From: n-kall Date: Mon, 11 Dec 2023 15:23:19 +0200 Subject: [PATCH 03/11] update test for thinning with multiple chains --- tests/testthat/test-thin_draws.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-thin_draws.R b/tests/testthat/test-thin_draws.R index aec7c40c..40a0a5ba 100644 --- a/tests/testthat/test-thin_draws.R +++ b/tests/testthat/test-thin_draws.R @@ -14,12 +14,20 @@ test_that("thin_draws works on rvars", { test_that("automatic thinning works as expected", { x <- as_draws_array(example_draws()) - mu_1 <- subset_draws(x, variable = "mu", chain = 1) + mu <- subset_draws(x, "mu") + mu_1 <- subset_draws(mu, chain = 1) + mu_2 <- subset_draws(mu, chain = 2) + mu_3 <- subset_draws(mu, chain = 3) + mu_4 <- subset_draws(mu, chain = 4) - ess_tail_mu_1 <- ess_tail(mu_1) - ess_bulk_mu_1 <- ess_bulk(mu_1) + ess_mu_1 <- SW(min(ess_tail(mu_1), ess_bulk(mu_1))) + ess_mu_2 <- SW(min(ess_tail(mu_2), ess_bulk(mu_2))) + ess_mu_3 <- SW(min(ess_tail(mu_2), ess_bulk(mu_3))) + ess_mu_4 <- SW(min(ess_tail(mu_3), ess_bulk(mu_4))) - thin_by <- round(ndraws(mu_1) / min(ess_tail_mu_1, ess_bulk_mu_1)) - expect_equal(thin_draws(mu_1), thin_draws(mu_1, thin = thin_by)) + thin_by <- round(niterations(mu) / mean( + c(ess_mu_1, ess_mu_2, ess_mu_3, ess_mu_4)) + ) + expect_equal(thin_draws(mu), thin_draws(mu, thin = thin_by)) }) From 5a4976cf3955071cf53d24d43cd0b2706cea6cd5 Mon Sep 17 00:00:00 2001 From: n-kall Date: Thu, 14 Dec 2023 14:15:46 +0200 Subject: [PATCH 04/11] enable non-integer thinning --- R/thin_draws.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index c5f845c4..327d6de6 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -27,11 +27,11 @@ thin_draws <- function(x, thin = NULL, ...) { #' @export thin_draws.draws <- function(x, thin = NULL, ...) { if (is.null(thin)) { - thin <- round(ess_based_thinning_all_vars(x)) - message("Automatically thinned by ", round(thin), " based on ESS.") + thin <- ess_based_thinning_all_vars(x) + message("Automatically thinned by ", round(thin, 1), " based on ESS.") } - thin <- as_one_integer(thin) + thin <- as_one_numeric(thin) if (thin == 1L) { # no thinning requested return(x) @@ -43,7 +43,7 @@ thin_draws.draws <- function(x, thin = NULL, ...) { if (thin > niterations) { stop_no_call("'thin' must be smaller than the total number of iterations.") } - iteration_ids <- seq(1, niterations, by = thin) + iteration_ids <- round(seq(1, niterations, by = thin)) subset_draws(x, iteration = iteration_ids) } From 1e5eff8d3bfdc4baf4248a14e9607a7155b447b2 Mon Sep 17 00:00:00 2001 From: n-kall <33577035+n-kall@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:31:35 +0200 Subject: [PATCH 05/11] update thin_draws docs Co-authored-by: Matthew Kay --- R/thin_draws.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index c5f845c4..d903cb82 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -5,7 +5,7 @@ #' @aliases thin #' @template args-methods-x #' @param thin (positive integer) The period for selecting draws. If -#' omitted, this will be automatically calculated based on bulk-ESS +#' `NULL`, this will be automatically calculated based on bulk-ESS #' and tail-ESS as suggested by Säilynoja et al. (2022). #' @template args-methods-dots #' @template ref-sailynoja-ecdf-2022 From ff147b3b7dacbbe88c12d696a3da5f8c7ba7ab9f Mon Sep 17 00:00:00 2001 From: n-kall Date: Wed, 10 Jan 2024 15:33:31 +0200 Subject: [PATCH 06/11] improve non-integer thinning documentation --- R/thin_draws.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index 327d6de6..b28322d1 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -4,9 +4,10 @@ #' #' @aliases thin #' @template args-methods-x -#' @param thin (positive integer) The period for selecting draws. If -#' omitted, this will be automatically calculated based on bulk-ESS -#' and tail-ESS as suggested by Säilynoja et al. (2022). +#' @param thin (positive numeric) The period for selecting draws. Must +#' be greater than or equal to 1. If omitted, this will be +#' automatically calculated based on bulk-ESS and tail-ESS as +#' suggested by Säilynoja et al. (2022). #' @template args-methods-dots #' @template ref-sailynoja-ecdf-2022 #' @template return-draws @@ -36,8 +37,8 @@ thin_draws.draws <- function(x, thin = NULL, ...) { # no thinning requested return(x) } - if (thin <= 0L) { - stop_no_call("'thin' must be a positive integer.") + if (thin <= 1L) { + stop_no_call("'thin' must be greater than or equal to 1") } niterations <- niterations(x) if (thin > niterations) { From 8d8ae7d2f3e5293373e6b324644dff86ac7b839a Mon Sep 17 00:00:00 2001 From: n-kall Date: Wed, 10 Jan 2024 15:51:01 +0200 Subject: [PATCH 07/11] fix thin draws test --- man/thin_draws.Rd | 7 ++++--- tests/testthat/test-thin_draws.R | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/man/thin_draws.Rd b/man/thin_draws.Rd index b7631e7d..9d895ea4 100644 --- a/man/thin_draws.Rd +++ b/man/thin_draws.Rd @@ -17,9 +17,10 @@ thin_draws(x, thin = NULL, ...) \item{x}{(draws) A \code{draws} object or another \R object for which the method is defined.} -\item{thin}{(positive integer) The period for selecting draws. If -omitted, this will be automatically calculated based on bulk-ESS -and tail-ESS as suggested by Säilynoja et al. (2022).} +\item{thin}{(positive numeric) The period for selecting draws. Must +be greater than or equal to 1. If \code{NULL}, it will be +automatically calculated based on bulk-ESS and tail-ESS as +suggested by Säilynoja et al. (2022).} \item{...}{Arguments passed to individual methods (if applicable).} } diff --git a/tests/testthat/test-thin_draws.R b/tests/testthat/test-thin_draws.R index 40a0a5ba..4598a3f2 100644 --- a/tests/testthat/test-thin_draws.R +++ b/tests/testthat/test-thin_draws.R @@ -2,7 +2,7 @@ test_that("thin_draws works correctly", { x <- as_draws_array(example_draws()) expect_equal(niterations(thin_draws(x, 5L)), niterations(x) / 5) expect_equal(x, thin_draws(x, thin = 1L)) - expect_error(thin_draws(x, -1), "'thin' must be a positive integer") + expect_error(thin_draws(x, -1), "'thin' must be greater than or equal to 1") expect_error(thin_draws(x, 1000), "'thin' must be smaller than") }) From c81850b32b1596f8e068d889d048ffaf622623b3 Mon Sep 17 00:00:00 2001 From: n-kall Date: Wed, 10 Jan 2024 16:00:00 +0200 Subject: [PATCH 08/11] fix autothinning test --- tests/testthat/test-thin_draws.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-thin_draws.R b/tests/testthat/test-thin_draws.R index 4598a3f2..52def0b1 100644 --- a/tests/testthat/test-thin_draws.R +++ b/tests/testthat/test-thin_draws.R @@ -22,12 +22,12 @@ test_that("automatic thinning works as expected", { ess_mu_1 <- SW(min(ess_tail(mu_1), ess_bulk(mu_1))) ess_mu_2 <- SW(min(ess_tail(mu_2), ess_bulk(mu_2))) - ess_mu_3 <- SW(min(ess_tail(mu_2), ess_bulk(mu_3))) - ess_mu_4 <- SW(min(ess_tail(mu_3), ess_bulk(mu_4))) + ess_mu_3 <- SW(min(ess_tail(mu_3), ess_bulk(mu_3))) + ess_mu_4 <- SW(min(ess_tail(mu_4), ess_bulk(mu_4))) - thin_by <- round(niterations(mu) / mean( + thin_by <- niterations(mu) / mean( c(ess_mu_1, ess_mu_2, ess_mu_3, ess_mu_4)) - ) + expect_equal(thin_draws(mu), thin_draws(mu, thin = thin_by)) }) From 1574d721649f36d9695a74d90168fd3b87ef1b1f Mon Sep 17 00:00:00 2001 From: n-kall Date: Fri, 12 Jan 2024 22:48:39 +0200 Subject: [PATCH 09/11] update thinning documentation --- R/thin_draws.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index ae9ea79a..c881dcf0 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -1,13 +1,16 @@ #' Thin `draws` objects #' -#' Thin [`draws`] objects to reduce their size and autocorrelation in the chains. +#' Thin [`draws`] objects to reduce their size and autocorrelation in +#' the chains. #' #' @aliases thin #' @template args-methods-x #' @param thin (positive numeric) The period for selecting draws. Must -#' be greater than or equal to 1. If `NULL`, it will be -#' automatically calculated based on bulk-ESS and tail-ESS as -#' suggested by Säilynoja et al. (2022). +#' be between 1 and the number of iterations. If the value is not an +#' integer, the draws will be selected such that the average interval +#' between them approaches the thin value. If `NULL`, it will be +#' automatically calculated based on bulk and tail effective sample +#' size as suggested by Säilynoja et al. (2022). #' @template args-methods-dots #' @template ref-sailynoja-ecdf-2022 #' @template return-draws From 3c3d7e832c9e2b29f5c6434947fa0a55c5c9d471 Mon Sep 17 00:00:00 2001 From: n-kall Date: Mon, 15 Jan 2024 15:06:14 +0200 Subject: [PATCH 10/11] update thin draws documentation --- R/thin_draws.R | 11 +++++++---- man/thin_draws.Rd | 14 ++++++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index c881dcf0..1edcbb12 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -7,10 +7,13 @@ #' @template args-methods-x #' @param thin (positive numeric) The period for selecting draws. Must #' be between 1 and the number of iterations. If the value is not an -#' integer, the draws will be selected such that the average interval -#' between them approaches the thin value. If `NULL`, it will be -#' automatically calculated based on bulk and tail effective sample -#' size as suggested by Säilynoja et al. (2022). +#' integer, the draws will be selected such that the number of draws +#' returned is equal to round(ndraws(x) / thin)). Intervals between +#' selected draws will be either ceiling(thin) or floor(thin), such +#' that the average interval will be close to the thin value. If +#' `NULL`, it will be automatically calculated based on bulk and +#' tail effective sample size as suggested by Säilynoja et +#' al. (2022). #' @template args-methods-dots #' @template ref-sailynoja-ecdf-2022 #' @template return-draws diff --git a/man/thin_draws.Rd b/man/thin_draws.Rd index 9d895ea4..b8887b8f 100644 --- a/man/thin_draws.Rd +++ b/man/thin_draws.Rd @@ -18,9 +18,14 @@ thin_draws(x, thin = NULL, ...) is defined.} \item{thin}{(positive numeric) The period for selecting draws. Must -be greater than or equal to 1. If \code{NULL}, it will be -automatically calculated based on bulk-ESS and tail-ESS as -suggested by Säilynoja et al. (2022).} +be between 1 and the number of iterations. If the value is not an +integer, the draws will be selected such that the number of draws +returned is equal to round(ndraws(x) / thin)). Intervals between +selected draws will be either ceiling(thin) or floor(thin), such +that the average interval will be close to the thin value. If +\code{NULL}, it will be automatically calculated based on bulk and +tail effective sample size as suggested by Säilynoja et +al. (2022).} \item{...}{Arguments passed to individual methods (if applicable).} } @@ -28,7 +33,8 @@ suggested by Säilynoja et al. (2022).} A \code{draws} object of the same class as \code{x}. } \description{ -Thin \code{\link{draws}} objects to reduce their size and autocorrelation in the chains. +Thin \code{\link{draws}} objects to reduce their size and autocorrelation in +the chains. } \examples{ x <- example_draws() From 3dc38b1c88d18099c61d18ce7e91036dd906b646 Mon Sep 17 00:00:00 2001 From: n-kall Date: Mon, 15 Jan 2024 16:06:02 +0200 Subject: [PATCH 11/11] typofix --- R/thin_draws.R | 2 +- man/thin_draws.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/thin_draws.R b/R/thin_draws.R index 1edcbb12..9f32a578 100644 --- a/R/thin_draws.R +++ b/R/thin_draws.R @@ -8,7 +8,7 @@ #' @param thin (positive numeric) The period for selecting draws. Must #' be between 1 and the number of iterations. If the value is not an #' integer, the draws will be selected such that the number of draws -#' returned is equal to round(ndraws(x) / thin)). Intervals between +#' returned is equal to round(ndraws(x) / thin). Intervals between #' selected draws will be either ceiling(thin) or floor(thin), such #' that the average interval will be close to the thin value. If #' `NULL`, it will be automatically calculated based on bulk and diff --git a/man/thin_draws.Rd b/man/thin_draws.Rd index b8887b8f..94525afd 100644 --- a/man/thin_draws.Rd +++ b/man/thin_draws.Rd @@ -20,7 +20,7 @@ is defined.} \item{thin}{(positive numeric) The period for selecting draws. Must be between 1 and the number of iterations. If the value is not an integer, the draws will be selected such that the number of draws -returned is equal to round(ndraws(x) / thin)). Intervals between +returned is equal to round(ndraws(x) / thin). Intervals between selected draws will be either ceiling(thin) or floor(thin), such that the average interval will be close to the thin value. If \code{NULL}, it will be automatically calculated based on bulk and