diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index aa06def3..95ed2ed8 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -319,7 +319,7 @@ adjust_gamma <- function(N, abort("Value of 'prob' must be in (0,1).") } if (is.null(interpolate_adj)) { - if (K <= 200) { + if (K <= 200 || N < 100) { interpolate_adj <- FALSE } else { interpolate_adj <- TRUE diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index eb0f0f31..aadd21b6 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -49,10 +49,11 @@ #' both, depending on the `y_draw` argument. #' } #' \item{`ppc_pit_ecdf()`, `ppc_pit_ecdf_grouped()`}{ -#' The ECDF of the empirical PIT values of `y` computed with respect to the -#' corresponding `yrep` values. `100 * prob`% central simultaneous confidence -#' intervals are provided to asses if `y` and `yrep` originate from the same -#' distribution. The PIT values can also be provided directly as `pit`. +#' The PIT-ECDF of the empirical PIT values of `y` computed with respect to +#' the corresponding `yrep` values. `100 * prob`% central simultaneous +#' confidence intervals are provided to asses if `y` and `yrep` originate +#' from the same distribution. The PIT values can also be provided directly +#' as `pit`. #' See Säilynoja et al. (2021) for more details.} #' } #' @@ -73,8 +74,8 @@ #' # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data) #' ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ]) #' -#' # ECDF and ECDF difference plot of the PIT values of y compared to yrep -#' # with 99% simultaneous confidence bands. +#' # PIT-ECDF and PIT-ECDF difference plot of the PIT values of y compared to +#' # yrep with 99% simultaneous confidence bands. #' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE) #' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = TRUE) #' } @@ -107,9 +108,9 @@ #' ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group) #' #' \donttest{ -#' # ECDF difference plots of the PIT values by group +#' # PIT-ECDF plots of the PIT values by group #' # with 99% simultaneous confidence bands. -#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, plot_diff = TRUE) +#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99) #' } #' #' \donttest{ @@ -612,7 +613,7 @@ ppc_pit_ecdf <- function(y, ) %>% unlist() if (is.null(K)) { - K <- nrow(yrep) + 1 + K <- min(nrow(yrep) + 1, 1000) } } else { inform("'pit' specified so ignoring 'y', and 'yrep' if specified.") @@ -631,7 +632,7 @@ ppc_pit_ecdf <- function(y, lims <- ecdf_intervals(gamma = gamma, N = N, K = K) ggplot() + aes( - x = 1:K / K, + x = seq(0,1,length.out = K), y = ecdf(pit)(seq(0, 1, length.out = K)) - (plot_diff == TRUE) * seq(0, 1, length.out = K), color = "y" @@ -679,7 +680,7 @@ ppc_pit_ecdf_grouped <- ) %>% unlist() if (is.null(K)) { - K <- nrow(yrep) + 1 + K <- min(nrow(yrep) + 1, 1000) } } else { inform("'pit' specified so ignoring 'y' and 'yrep' if specified.") @@ -691,7 +692,7 @@ ppc_pit_ecdf_grouped <- N_g <- sum(group == g) adjust_gamma( N = N_g, - K = min(N_g, K), + K = ifelse(is.null(K), N_g, K), prob = prob, interpolate_adj = interpolate_adj ) @@ -700,21 +701,23 @@ ppc_pit_ecdf_grouped <- data <- data.frame(pit = pit, group = group) %>% group_by(group) %>% - dplyr::group_map(~ data.frame( - ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = min(nrow(.x), K))), - group = .y[1], - lims_upper = ecdf_intervals( - gamma = gammas[[unlist(.y[1])]], - N = nrow(.x), - K = min(nrow(.x), K) - )$upper[-1] / nrow(.x), - lims_lower = ecdf_intervals( - gamma = gammas[[unlist(.y[1])]], - N = nrow(.x), - K = min(nrow(.x), K) - )$lower[-1] / nrow(.x), - x = seq(0, 1, length.out = min(nrow(.x), K)) - )) %>% + dplyr::group_map( + ~ data.frame( + ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K))), + group = .y[1], + lims_upper = ecdf_intervals( + gamma = gammas[[unlist(.y[1])]], + N = nrow(.x), + K = ifelse(is.null(K), nrow(.x), K) + )$upper[-1] / nrow(.x), + lims_lower = ecdf_intervals( + gamma = gammas[[unlist(.y[1])]], + N = nrow(.x), + K = ifelse(is.null(K), nrow(.x), K) + )$lower[-1] / nrow(.x), + x = seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K)) + ) + ) %>% dplyr::bind_rows() ggplot(data) + diff --git a/man-roxygen/args-pit-ecdf.R b/man-roxygen/args-pit-ecdf.R index 3a8c3dce..3e4a5b92 100644 --- a/man-roxygen/args-pit-ecdf.R +++ b/man-roxygen/args-pit-ecdf.R @@ -1,16 +1,17 @@ #' @param K An optional integer defining the number of equally spaced evaluation -#' points for the ECDF. Reducing K when using `interpolate_adj = FALSE` makes -#' computing the confidence bands faster. For `ppc_pit_ecdf` and -#' `ppc_pit_ecdf_grouped`, defaults to `ncol(yrep) + 1`, or `length(pit)` if PIT -#' values are supplied. For `mcmc_rank_ecdf` defaults to the number of -#' iterations per chain in `x`. +#' points for the PIT-ECDF. Reducing K when using `interpolate_adj = FALSE` +#' makes computing the confidence bands faster. For `ppc_pit_ecdf` and +#' `ppc_pit_ecdf_grouped`, if PIT values are supplied, defaults to +#' `length(pit)`, otherwise yrep determines the maximum accuracy of the +#' estimated PIT values and `Ḱ` is set to `min(nrow(yrep) + 1, 1000)`. For +#' `mcmc_rank_ecdf`, defaults to the number of iterations per chain in `x`. #' @param prob The desired simultaneous coverage level of the bands around the #' ECDF. A value in (0,1). #' @param plot_diff A boolean defining whether to plot the difference between -#' the observed ECDF and the theoretical expectation for uniform PIT values -#' rather than plotting the regular ECDF. The default is `FALSE`, but for -#' large samples we recommend setting `plot_diff=TRUE` as the difference plot -#' will visually show a more dynamic range. +#' the observed PIT- ECDF and the theoretical expectation for uniform PIT +#' values rather than plotting the regular ECDF. The default is `FALSE`, but +#' for large samples we recommend setting `plot_diff=TRUE` as the difference +#' plot will visually show a more dynamic range. #' @param interpolate_adj A boolean defining if the simultaneous confidence #' bands should be interpolated based on precomputed values rather than #' computed exactly. Computing the bands may be computationally intensive and diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 3993bbff..06b0891c 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -194,11 +194,12 @@ of rank-normalized MCMC samples. Defaults to \code{20}.} average number of ranks per bin. Defaults to \code{FALSE}.} \item{K}{An optional integer defining the number of equally spaced evaluation -points for the ECDF. Reducing K when using \code{interpolate_adj = FALSE} makes -computing the confidence bands faster. For \code{ppc_pit_ecdf} and -\code{ppc_pit_ecdf_grouped}, defaults to \code{ncol(yrep) + 1}, or \code{length(pit)} if PIT -values are supplied. For \code{mcmc_rank_ecdf} defaults to the number of -iterations per chain in \code{x}.} +points for the PIT-ECDF. Reducing K when using \code{interpolate_adj = FALSE} +makes computing the confidence bands faster. For \code{ppc_pit_ecdf} and +\code{ppc_pit_ecdf_grouped}, if PIT values are supplied, defaults to +\code{length(pit)}, otherwise yrep determines the maximum accuracy of the +estimated PIT values and \code{Ḱ} is set to \code{min(nrow(yrep) + 1, 1000)}. For +\code{mcmc_rank_ecdf}, defaults to the number of iterations per chain in \code{x}.} \item{prob}{For \code{mcmc_rank_ecdf()}, a value between 0 and 1 specifying the desired simultaneous confidence of the confidence bands to be diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index 5c633803..83fa15bc 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -212,20 +212,21 @@ which the ECDF is to be drawn. If NULL, PIT values are computed to \code{y} with respect to the corresponding values in \code{yrep}.} \item{K}{An optional integer defining the number of equally spaced evaluation -points for the ECDF. Reducing K when using \code{interpolate_adj = FALSE} makes -computing the confidence bands faster. For \code{ppc_pit_ecdf} and -\code{ppc_pit_ecdf_grouped}, defaults to \code{ncol(yrep) + 1}, or \code{length(pit)} if PIT -values are supplied. For \code{mcmc_rank_ecdf} defaults to the number of -iterations per chain in \code{x}.} +points for the PIT-ECDF. Reducing K when using \code{interpolate_adj = FALSE} +makes computing the confidence bands faster. For \code{ppc_pit_ecdf} and +\code{ppc_pit_ecdf_grouped}, if PIT values are supplied, defaults to +\code{length(pit)}, otherwise yrep determines the maximum accuracy of the +estimated PIT values and \code{Ḱ} is set to \code{min(nrow(yrep) + 1, 1000)}. For +\code{mcmc_rank_ecdf}, defaults to the number of iterations per chain in \code{x}.} \item{prob}{The desired simultaneous coverage level of the bands around the ECDF. A value in (0,1).} \item{plot_diff}{A boolean defining whether to plot the difference between -the observed ECDF and the theoretical expectation for uniform PIT values -rather than plotting the regular ECDF. The default is \code{FALSE}, but for -large samples we recommend setting \code{plot_diff=TRUE} as the difference plot -will visually show a more dynamic range.} +the observed PIT- ECDF and the theoretical expectation for uniform PIT +values rather than plotting the regular ECDF. The default is \code{FALSE}, but +for large samples we recommend setting \code{plot_diff=TRUE} as the difference +plot will visually show a more dynamic range.} \item{interpolate_adj}{A boolean defining if the simultaneous confidence bands should be interpolated based on precomputed values rather than @@ -278,10 +279,11 @@ quantiles. \code{y} is overlaid on the plot either as a violin, points, or both, depending on the \code{y_draw} argument. } \item{\code{ppc_pit_ecdf()}, \code{ppc_pit_ecdf_grouped()}}{ -The ECDF of the empirical PIT values of \code{y} computed with respect to the -corresponding \code{yrep} values. \code{100 * prob}\% central simultaneous confidence -intervals are provided to asses if \code{y} and \code{yrep} originate from the same -distribution. The PIT values can also be provided directly as \code{pit}. +The PIT-ECDF of the empirical PIT values of \code{y} computed with respect to +the corresponding \code{yrep} values. \code{100 * prob}\% central simultaneous +confidence intervals are provided to asses if \code{y} and \code{yrep} originate +from the same distribution. The PIT values can also be provided directly +as \code{pit}. See Säilynoja et al. (2021) for more details.} } } @@ -298,8 +300,8 @@ ppc_dens_overlay(y, yrep[1:25, ]) # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data) ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ]) -# ECDF and ECDF difference plot of the PIT values of y compared to yrep -# with 99\% simultaneous confidence bands. +# PIT-ECDF and PIT-ECDF difference plot of the PIT values of y compared to +# yrep with 99\% simultaneous confidence bands. ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE) ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = TRUE) } @@ -332,9 +334,9 @@ ppc_dens_overlay_grouped(y, yrep[1:25, ], group = group) ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group) \donttest{ -# ECDF difference plots of the PIT values by group +# PIT-ECDF plots of the PIT values by group # with 99\% simultaneous confidence bands. -ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, plot_diff = TRUE) +ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99) } \donttest{ diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg index a8ccbc11..72a276fe 100644 --- a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg @@ -42,15 +42,15 @@ - - - - + + + + -0.00 -0.25 -0.50 -0.75 +0.00 +0.25 +0.50 +0.75 1.00 PIT ECDF diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg index c8888419..a2f9f2a4 100644 --- a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg @@ -40,15 +40,15 @@ - - - - + + + + -0.00 -0.25 -0.50 -0.75 +0.00 +0.25 +0.50 +0.75 1.00 PIT ECDF - difference diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg index b47aabd2..1c41d39b 100644 --- a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg @@ -25,9 +25,9 @@ - - - + + + @@ -39,9 +39,9 @@ - - - + + + @@ -53,9 +53,9 @@ - - - + + + @@ -67,9 +67,9 @@ - - - + + + diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-diff.svg index a62e2778..36708817 100644 --- a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-diff.svg +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-diff.svg @@ -25,9 +25,9 @@ - - - + + + @@ -39,9 +39,9 @@ - - - + + + @@ -53,9 +53,9 @@ - - - + + + @@ -67,9 +67,9 @@ - - - + + + @@ -137,19 +137,19 @@ 0.75 1.00 --0.2 -0.0 -0.2 - - - +-0.2 +0.0 +0.2 + + + --0.2 -0.0 -0.2 - - - +-0.2 +0.0 +0.2 + + + PIT ECDF - difference ppc_pit_ecdf_grouped (diff)