From 88fdfab50cafcbf19854053b3249dbf5571d14a1 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 07:21:07 -0700 Subject: [PATCH 01/82] uses cli_warn and cli_abort in bootci.R --- R/bootci.R | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index 1af5731e..7ae3e3b8 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -6,12 +6,12 @@ check_rset <- function(x, app = TRUE) { if (!inherits(x, "bootstraps")) { - rlang::abort("`.data` should be an `rset` object generated from `bootstraps()`") + cli::cli_abort("`.data` should be an `rset` object generated from `bootstraps()`") } if (app) { if (x %>% dplyr::filter(id == "Apparent") %>% nrow() != 1) { - rlang::abort("Please set `apparent = TRUE` in `bootstraps()` function") + cli::cli_abort("Please set `apparent = TRUE` in `bootstraps()` function") } } invisible(NULL) @@ -28,15 +28,15 @@ std_exp <- c("std.error", "robust.se") check_tidy_names <- function(x, std_col) { # check for proper columns if (sum(colnames(x) == "estimate") != 1) { - rlang::abort(stat_nm_err) + cli::cli_abort(stat_nm_err) } if (sum(colnames(x) == "term") != 1) { - rlang::abort(stat_nm_err) + cli::cli_abort(stat_nm_err) } if (std_col) { std_candidates <- colnames(x) %in% std_exp if (sum(std_candidates) != 1) { - rlang::abort( + cli::cli_abort( "`statistics` should select a single column for the standard error." ) } @@ -59,7 +59,7 @@ check_tidy <- function(x, std_col = FALSE) { } if (inherits(x, "try-error")) { - rlang::abort(stat_fmt_err) + cli::cli_abort(stat_fmt_err) } check_tidy_names(x, std_col) @@ -117,7 +117,7 @@ new_stats <- function(x, lo, hi) { has_dots <- function(x) { nms <- names(formals(x)) if (!any(nms == "...")) { - rlang::abort("`.fn` must have an argument `...`.") + cli::cli_abort("`.fn` must have an argument `...`.") } invisible(NULL) } @@ -131,14 +131,8 @@ check_num_resamples <- function(x, B = 1000) { if (nrow(x) > 0) { terms <- paste0("`", x$term, "`") - msg <- - paste0( - "Recommend at least ", B, " non-missing bootstrap resamples for ", - ifelse(length(terms) > 1, "terms: ", "term "), - paste0(terms, collapse = ", "), - "." - ) - rlang::warn(msg) + cli::cli_warn(paste0("Recommend at least {B} non-missing bootstrap", + "resamples for {cli::qty(terms)} term{?s} {terms}.")) } invisible(NULL) } @@ -149,11 +143,11 @@ check_num_resamples <- function(x, B = 1000) { pctl_single <- function(stats, alpha = 0.05) { if (all(is.na(stats))) { - rlang::abort("All statistics have missing values..") + cli::cli_abort("All statistics have missing values.") } if (!is.numeric(stats)) { - rlang::abort("`stats` must be a numeric vector.") + cli::cli_abort("`stats` must be a numeric vector.") } # stats is a numeric vector of values @@ -289,19 +283,19 @@ t_single <- function(stats, std_err, is_orig, alpha = 0.05) { # which_orig is the index of stats and std_err that has the original result if (all(is.na(stats))) { - rlang::abort("All statistics have missing values.") + cli::cli_abort("All statistics have missing values.") } if (!is.logical(is_orig) || any(is.na(is_orig))) { - rlang::abort( + cli::cli_abort( "`is_orig` should be a logical column the same length as `stats` with no missing values." ) } if (length(stats) != length(std_err) && length(stats) != length(is_orig)) { - rlang::abort("`stats`, `std_err`, and `is_orig` should have the same length.") + cli::cli_abort("`stats`, `std_err`, and `is_orig` should have the same length.") } if (sum(is_orig) != 1) { - rlang::abort("The original statistic must be in a single row.") + cli::cli_abort("The original statistic must be in a single row.") } theta_obs <- stats[is_orig] @@ -339,12 +333,12 @@ int_t.bootstraps <- function(.data, statistics, alpha = 0.05, ...) { check_dots_empty() check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - abort("`alpha` must be a single numeric value.") + cli::cli_abort("`alpha` must be a single numeric value.") } column_name <- tidyselect::vars_select(names(.data), !!enquo(statistics)) if (length(column_name) != 1) { - rlang::abort(stat_fmt_err) + cli::cli_abort(stat_fmt_err) } stats <- .data %>% dplyr::select(!!column_name, id) stats <- check_tidy(stats, std_col = TRUE) @@ -366,7 +360,7 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) { # TODO check per term if (all(is.na(stats$estimate))) { - rlang::abort("All statistics have missing values.") + cli::cli_abort("All statistics have missing values.") } ### Estimating Z0 bias-correction @@ -440,14 +434,14 @@ int_bca <- function(.data, ...) { int_bca.bootstraps <- function(.data, statistics, alpha = 0.05, .fn, ...) { check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - abort("`alpha` must be a single numeric value.") + cli::cli_abort("`alpha` must be a single numeric value.") } has_dots(.fn) column_name <- tidyselect::vars_select(names(.data), !!enquo(statistics)) if (length(column_name) != 1) { - rlang::abort(stat_fmt_err) + cli::cli_abort(stat_fmt_err) } stats <- .data %>% dplyr::select(!!column_name, id) stats <- check_tidy(stats) From f677f9f3c3aca9afbe9f8cb85a5b187239dcce53 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 07:29:13 -0700 Subject: [PATCH 02/82] edits cli_warn to use {term} correctly --- R/bootci.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bootci.R b/R/bootci.R index 7ae3e3b8..b92d1560 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -131,7 +131,7 @@ check_num_resamples <- function(x, B = 1000) { if (nrow(x) > 0) { terms <- paste0("`", x$term, "`") - cli::cli_warn(paste0("Recommend at least {B} non-missing bootstrap", + cli::cli_warn(paste0("Recommend at least {B} non-missing bootstrap ", "resamples for {cli::qty(terms)} term{?s} {terms}.")) } invisible(NULL) From 28438e06b88647510c7d3c049960066a95114cb4 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 07:49:45 -0700 Subject: [PATCH 03/82] Adds back the period --- R/bootci.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bootci.R b/R/bootci.R index b92d1560..713d8ce5 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -143,7 +143,7 @@ check_num_resamples <- function(x, B = 1000) { pctl_single <- function(stats, alpha = 0.05) { if (all(is.na(stats))) { - cli::cli_abort("All statistics have missing values.") + cli::cli_abort("All statistics have missing values..") } if (!is.numeric(stats)) { From 0542408a346440db66237838b8181aaff8309d1a Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 15 Aug 2024 09:38:26 -0700 Subject: [PATCH 04/82] Use cli erros in rsplit.R Fixes #512 --- R/rsplit.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/rsplit.R b/R/rsplit.R index 96234934..c13b7333 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -1,20 +1,20 @@ rsplit <- function(data, in_id, out_id) { if (!is.data.frame(data) & !is.matrix(data)) { - rlang::abort("`data` must be a data frame.") + cli::cli_abort("`data` must be a data frame.") } if (!is.integer(in_id) | any(in_id < 1)) { - rlang::abort("`in_id` must be a positive integer vector.") + cli::cli_abort("`in_id` must be a positive integer vector.") } if (!all(is.na(out_id))) { if (!is.integer(out_id) | any(out_id < 1)) { - rlang::abort("`out_id` must be a positive integer vector.") + cli::cli_abort("`out_id` must be a positive integer vector.") } } if (length(in_id) == 0) { - rlang::abort("At least one row should be selected for the analysis set.") + cli::cli_abort("At least one row should be selected for the analysis set.") } structure( @@ -88,13 +88,13 @@ as.data.frame.rsplit <- data = "analysis", ...) { if (!is.null(row.names)) { - rlang::warn(paste0( + cli::cli_warn(paste0( "`row.names` is kept for consistency with the underlying class but ", "non-NULL values will be ignored." )) } if (optional) { - rlang::warn(paste0( + cli::cli_warn(paste0( "`optional` is kept for consistency with the underlying class but ", "TRUE values will be ignored." )) @@ -107,7 +107,7 @@ as.data.frame.rsplit <- "There is no assessment data set for an `rsplit` object", " with class `", rsplit_class, "`." ) - rlang::abort(msg) + cli::cli_abort(msg) } ind <- as.integer(x, data = data, ...) permuted_col <- vctrs::vec_slice(x$data, ind) %>% From 4fcf79a513490ccc4ab89d6f97a2499c8a8f492a Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 10:16:15 -0700 Subject: [PATCH 05/82] Remove extra period, update _snaps/bootci.mc --- R/bootci.R | 2 +- tests/testthat/_snaps/bootci.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index 713d8ce5..b92d1560 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -143,7 +143,7 @@ check_num_resamples <- function(x, B = 1000) { pctl_single <- function(stats, alpha = 0.05) { if (all(is.na(stats))) { - cli::cli_abort("All statistics have missing values..") + cli::cli_abort("All statistics have missing values.") } if (!is.numeric(stats)) { diff --git a/tests/testthat/_snaps/bootci.md b/tests/testthat/_snaps/bootci.md index 77e607e1..8632d77e 100644 --- a/tests/testthat/_snaps/bootci.md +++ b/tests/testthat/_snaps/bootci.md @@ -6,7 +6,7 @@ Warning: Recommend at least 1000 non-missing bootstrap resamples for term `mean`. Error in `pctl_single()`: - ! All statistics have missing values.. + ! All statistics have missing values. --- From f47b9df40552dbce6e4d2e91276417ab77d513f2 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 15 Aug 2024 10:16:21 -0700 Subject: [PATCH 06/82] drop cli:: from cli_abort, use .arg, and remove paste0 --- R/rsplit.R | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/R/rsplit.R b/R/rsplit.R index c13b7333..a82b1562 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -1,20 +1,20 @@ rsplit <- function(data, in_id, out_id) { if (!is.data.frame(data) & !is.matrix(data)) { - cli::cli_abort("`data` must be a data frame.") + cli_abort("{.arg `data`} must be a data frame.") } if (!is.integer(in_id) | any(in_id < 1)) { - cli::cli_abort("`in_id` must be a positive integer vector.") + cli_abort("{.arg in_id} must be a positive integer vector.") } if (!all(is.na(out_id))) { if (!is.integer(out_id) | any(out_id < 1)) { - cli::cli_abort("`out_id` must be a positive integer vector.") + cli_abort("{.arg out_id} must be a positive integer vector.") } } if (length(in_id) == 0) { - cli::cli_abort("At least one row should be selected for the analysis set.") + cli_abort("At least one row should be selected for the analysis set.") } structure( @@ -88,26 +88,25 @@ as.data.frame.rsplit <- data = "analysis", ...) { if (!is.null(row.names)) { - cli::cli_warn(paste0( - "`row.names` is kept for consistency with the underlying class but ", - "non-NULL values will be ignored." - )) + cli::cli_warn( + "{.arg row.names} is kept for consistency with the underlying class but + non-NULL values will be ignored." + ) } if (optional) { - cli::cli_warn(paste0( - "`optional` is kept for consistency with the underlying class but ", - "TRUE values will be ignored." - )) + cli::cli_warn( + "{.arg optional} is kept for consistency with the underlying class but + TRUE values will be ignored." + ) } if (!is.null(x$col_id)) { if (identical(data, "assessment")) { rsplit_class <- class(x)[[1]] - msg <- paste0( - "There is no assessment data set for an `rsplit` object", - " with class `", rsplit_class, "`." + cli_abort( + "There is no assessment data set for an {.arg rsplit} object + with class {rsplit_class}." ) - cli::cli_abort(msg) } ind <- as.integer(x, data = data, ...) permuted_col <- vctrs::vec_slice(x$data, ind) %>% From 635ff72fbbff7c8797cf1940b12ad2ccc885abb0 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 15 Aug 2024 10:33:35 -0700 Subject: [PATCH 07/82] remove extra backticks --- R/rsplit.R | 2 +- tests/testthat/_snaps/permutations.new.md | 30 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/permutations.new.md diff --git a/R/rsplit.R b/R/rsplit.R index a82b1562..d99a98cb 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -1,6 +1,6 @@ rsplit <- function(data, in_id, out_id) { if (!is.data.frame(data) & !is.matrix(data)) { - cli_abort("{.arg `data`} must be a data frame.") + cli_abort("{.arg data} must be a data frame.") } if (!is.integer(in_id) | any(in_id < 1)) { diff --git a/tests/testthat/_snaps/permutations.new.md b/tests/testthat/_snaps/permutations.new.md new file mode 100644 index 00000000..515f099d --- /dev/null +++ b/tests/testthat/_snaps/permutations.new.md @@ -0,0 +1,30 @@ +# no assessment set + + Code + assessment(xx$splits[[1]]) + Condition + Error in `as.data.frame()`: + ! There is no assessment data set for an `rsplit` object with class perm_split. + +# printing + + Code + permutations(mtcars, 1) + Output + # Permutation sampling + # Permuted columns: [mpg] + # A tibble: 25 x 2 + splits id + + 1 Permutations01 + 2 Permutations02 + 3 Permutations03 + 4 Permutations04 + 5 Permutations05 + 6 Permutations06 + 7 Permutations07 + 8 Permutations08 + 9 Permutations09 + 10 Permutations10 + # i 15 more rows + From 79e73244417a219af9417fcab7c7fd97aeab1c96 Mon Sep 17 00:00:00 2001 From: laurabrianna Date: Thu, 15 Aug 2024 10:35:23 -0700 Subject: [PATCH 08/82] Fixes #475 - Documentation edits-decluttering the top of the references page --- R/initial_split.R | 8 +++++--- R/initial_validation_split.R | 3 ++- R/validation_set.R | 2 ++ man/initial_split.Rd | 3 ++- man/initial_validation_split.Rd | 5 +++-- man/validation_set.Rd | 2 +- 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/initial_split.R b/R/initial_split.R index 66157c76..51d2c5cd 100644 --- a/R/initial_split.R +++ b/R/initial_split.R @@ -6,7 +6,9 @@ #' `group_initial_split` creates splits of the data based #' on some grouping variable, so that all data in a "group" is assigned to #' the same split. -#' `training` and `testing` are used to extract the resulting data. +#' +#' @details `training` and `testing` are used to extract the resulting data. +#' #' @template strata_details #' @inheritParams vfold_cv #' @inheritParams make_strata @@ -176,12 +178,12 @@ group_initial_split <- function(data, group, prop = 3 / 4, ..., strata = NULL, p attrib <- .get_split_args(res, allow_strata_false = TRUE) res <- res$splits[[1]] - + attrib$times <- NULL for (i in names(attrib)) { attr(res, i) <- attrib[[i]] } class(res) <- c("group_initial_split", "initial_split", class(res)) - + res } diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index e3ce84d6..70d40ec8 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -8,7 +8,8 @@ #' `group_initial_validation_split()` creates similar random splits of the data #' based on some grouping variable, so that all data in a "group" are assigned #' to the same partition. -#' `training()`, `validation()`, and `testing()` can be used to extract the +#' +#' @details `training()`, `validation()`, and `testing()` can be used to extract the #' resulting data sets. #' Use [`validation_set()`] to create an `rset` object for use with functions from #' the tune package such as `tune::tune_grid()`. diff --git a/R/validation_set.R b/R/validation_set.R index 14d48eb3..2ec79da7 100644 --- a/R/validation_set.R +++ b/R/validation_set.R @@ -1,5 +1,7 @@ #' Create a Validation Split for Tuning #' +#' `validation_set` creates a the validation split for model tuning. +#' #' @param split An object of class `initial_validation_split`, such as resulting #' from [initial_validation_split()] or [group_initial_validation_split()]. #' @param x An `rsplit` object produced by `validation_set()`. diff --git a/man/initial_split.Rd b/man/initial_split.Rd index 740bf66f..1e8946f2 100644 --- a/man/initial_split.Rd +++ b/man/initial_split.Rd @@ -71,9 +71,10 @@ set and testing set. \code{initial_time_split} does the same, but takes the \code{group_initial_split} creates splits of the data based on some grouping variable, so that all data in a "group" is assigned to the same split. -\code{training} and \code{testing} are used to extract the resulting data. } \details{ +\code{training} and \code{testing} are used to extract the resulting data. + With a \code{strata} argument, the random sampling is conducted \emph{within the stratification variable}. This can help ensure that the resamples have equivalent proportions as the original data set. For diff --git a/man/initial_validation_split.Rd b/man/initial_validation_split.Rd index 734a2a97..0b9d5be8 100644 --- a/man/initial_validation_split.Rd +++ b/man/initial_validation_split.Rd @@ -81,12 +81,13 @@ data set, with the first observations being put into the training set. \code{group_initial_validation_split()} creates similar random splits of the data based on some grouping variable, so that all data in a "group" are assigned to the same partition. +} +\details{ \code{training()}, \code{validation()}, and \code{testing()} can be used to extract the resulting data sets. Use \code{\link[=validation_set]{validation_set()}} to create an \code{rset} object for use with functions from the tune package such as \code{tune::tune_grid()}. -} -\details{ + With a \code{strata} argument, the random sampling is conducted \emph{within the stratification variable}. This can help ensure that the resamples have equivalent proportions as the original data set. For diff --git a/man/validation_set.Rd b/man/validation_set.Rd index de3d91bd..b5e3fc2a 100644 --- a/man/validation_set.Rd +++ b/man/validation_set.Rd @@ -35,7 +35,7 @@ An tibble with classes \code{validation_set}, \code{rset}, \code{tbl_df}, \code{ column called \code{id} that has a character string with the resample identifier. } \description{ -Create a Validation Split for Tuning +\code{validation_set} creates a the validation split for model tuning. } \examples{ set.seed(1353) From 55ef8500518c9e8443873d791903865ba5c5ab82 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 10:41:53 -0700 Subject: [PATCH 09/82] adds {.arg} formatting, catches a few more rlang::abort --- R/bootci.R | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index b92d1560..a1059118 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -6,12 +6,12 @@ check_rset <- function(x, app = TRUE) { if (!inherits(x, "bootstraps")) { - cli::cli_abort("`.data` should be an `rset` object generated from `bootstraps()`") + cli::cli_abort("{.arg .data} should be an `rset` object generated from {.fn bootstraps}") } if (app) { if (x %>% dplyr::filter(id == "Apparent") %>% nrow() != 1) { - cli::cli_abort("Please set `apparent = TRUE` in `bootstraps()` function") + cli::cli_abort("Please set `apparent = TRUE` in {.fn bootstraps} function") } } invisible(NULL) @@ -36,9 +36,7 @@ check_tidy_names <- function(x, std_col) { if (std_col) { std_candidates <- colnames(x) %in% std_exp if (sum(std_candidates) != 1) { - cli::cli_abort( - "`statistics` should select a single column for the standard error." - ) + cli::cli_abort("{.arg statistics} should select a single column for the standard error.") } } invisible(TRUE) @@ -117,7 +115,7 @@ new_stats <- function(x, lo, hi) { has_dots <- function(x) { nms <- names(formals(x)) if (!any(nms == "...")) { - cli::cli_abort("`.fn` must have an argument `...`.") + cli::cli_abort("{.arg .fn} must have an argument {.arg ...}.") } invisible(NULL) } @@ -147,7 +145,7 @@ pctl_single <- function(stats, alpha = 0.05) { } if (!is.numeric(stats)) { - cli::cli_abort("`stats` must be a numeric vector.") + cli::cli_abort("{.arg stats} must be a numeric vector.") } # stats is a numeric vector of values @@ -252,7 +250,7 @@ int_pctl.bootstraps <- function(.data, statistics, alpha = 0.05, ...) { check_dots_empty() check_rset(.data, app = FALSE) if (length(alpha) != 1 || !is.numeric(alpha)) { - abort("`alpha` must be a single numeric value.") + cli::cli_abort("{.arg alpha} must be a single numeric value.") } .data <- .data %>% dplyr::filter(id != "Apparent") @@ -288,11 +286,12 @@ t_single <- function(stats, std_err, is_orig, alpha = 0.05) { if (!is.logical(is_orig) || any(is.na(is_orig))) { cli::cli_abort( - "`is_orig` should be a logical column the same length as `stats` with no missing values." + "{.arg is_orig} should be a logical column the same length as {.arg stats} with no missing values." ) } if (length(stats) != length(std_err) && length(stats) != length(is_orig)) { - cli::cli_abort("`stats`, `std_err`, and `is_orig` should have the same length.") + function_args <- c('stats', 'std_err', 'is_orig') + cli::cli_abort("{.arg {function_args}} should have the same length.") } if (sum(is_orig) != 1) { cli::cli_abort("The original statistic must be in a single row.") @@ -333,7 +332,7 @@ int_t.bootstraps <- function(.data, statistics, alpha = 0.05, ...) { check_dots_empty() check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - cli::cli_abort("`alpha` must be a single numeric value.") + cli::cli_abort("{.arg alpha} must be a single numeric value.") } column_name <- tidyselect::vars_select(names(.data), !!enquo(statistics)) @@ -375,7 +374,7 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) { if (inherits(loo_test, "try-error")) { cat("Running `.fn` on the LOO resamples produced an error:\n") print(loo_test) - rlang::abort("`.fn` failed.") + cli::cli_abort("{.arg .fn} failed.") } loo_res <- furrr::future_map(loo_rs$splits, .fn, ...) %>% list_rbind() @@ -434,7 +433,7 @@ int_bca <- function(.data, ...) { int_bca.bootstraps <- function(.data, statistics, alpha = 0.05, .fn, ...) { check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - cli::cli_abort("`alpha` must be a single numeric value.") + cli::cli_abort("{.arg alpha} must be a single numeric value.") } has_dots(.fn) From e1f658dde498eab7dae22a096a0a4c31e1b9b752 Mon Sep 17 00:00:00 2001 From: seb09 Date: Thu, 15 Aug 2024 10:42:52 -0700 Subject: [PATCH 10/82] Updated example for nested_cv() Replaced as.data.frame() by the rsample accessor functions and changed the car name - Fixes #479 --- R/nest.R | 18 +++++++++--------- man/nested_cv.Rd | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/nest.R b/R/nest.R index d3c6b080..d2d3427e 100644 --- a/R/nest.R +++ b/R/nest.R @@ -34,18 +34,18 @@ #' inside = vfold_cv(v = 3) #' ) #' -#' first_outer_split <- bad_idea$splits[[1]] -#' outer_analysis <- as.data.frame(first_outer_split) -#' sum(grepl("Volvo 142E", rownames(outer_analysis))) +#' first_outer_split <- get_rsplit(bad_idea, 1) +#' outer_analysis <- analysis(first_outer_split) +#' sum(grepl("Camaro Z28", rownames(outer_analysis))) #' #' ## For the 3-fold CV used inside of each bootstrap, how are the replicated -#' ## `Volvo 142E` data partitioned? -#' first_inner_split <- bad_idea$inner_resamples[[1]]$splits[[1]] -#' inner_analysis <- as.data.frame(first_inner_split) -#' inner_assess <- as.data.frame(first_inner_split, data = "assessment") +#' ## `Camaro Z28` data partitioned? +#' first_inner_split <- get_rsplit(bad_idea$inner_resamples[[1]], 1) +#' inner_analysis <- analysis(first_inner_split) +#' inner_assess <- assessment(first_inner_split) #' -#' sum(grepl("Volvo 142E", rownames(inner_analysis))) -#' sum(grepl("Volvo 142E", rownames(inner_assess))) +#' sum(grepl("Camaro Z28", rownames(inner_analysis))) +#' sum(grepl("Camaro Z28", rownames(inner_assess))) #' @export nested_cv <- function(data, outside, inside) { cl <- match.call() diff --git a/man/nested_cv.Rd b/man/nested_cv.Rd index 826a9c47..cdb19761 100644 --- a/man/nested_cv.Rd +++ b/man/nested_cv.Rd @@ -48,16 +48,16 @@ bad_idea <- nested_cv(mtcars, inside = vfold_cv(v = 3) ) -first_outer_split <- bad_idea$splits[[1]] -outer_analysis <- as.data.frame(first_outer_split) -sum(grepl("Volvo 142E", rownames(outer_analysis))) +first_outer_split <- get_rsplit(bad_idea, 1) +outer_analysis <- analysis(first_outer_split) +sum(grepl("Camaro Z28", rownames(outer_analysis))) ## For the 3-fold CV used inside of each bootstrap, how are the replicated -## `Volvo 142E` data partitioned? -first_inner_split <- bad_idea$inner_resamples[[1]]$splits[[1]] -inner_analysis <- as.data.frame(first_inner_split) -inner_assess <- as.data.frame(first_inner_split, data = "assessment") +## `Camaro Z28` data partitioned? +first_inner_split <- get_rsplit(bad_idea$inner_resamples[[1]], 1) +inner_analysis <- analysis(first_inner_split) +inner_assess <- assessment(first_inner_split) -sum(grepl("Volvo 142E", rownames(inner_analysis))) -sum(grepl("Volvo 142E", rownames(inner_assess))) +sum(grepl("Camaro Z28", rownames(inner_analysis))) +sum(grepl("Camaro Z28", rownames(inner_assess))) } From 0093a28c3eb7c7aa3d883253f14f153e5b31309e Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 15 Aug 2024 10:45:49 -0700 Subject: [PATCH 11/82] update arg names and accept new tests --- R/rsplit.R | 2 +- tests/testthat/_snaps/permutations.md | 2 +- tests/testthat/_snaps/permutations.new.md | 30 ----------------------- 3 files changed, 2 insertions(+), 32 deletions(-) delete mode 100644 tests/testthat/_snaps/permutations.new.md diff --git a/R/rsplit.R b/R/rsplit.R index d99a98cb..744e9d9e 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -105,7 +105,7 @@ as.data.frame.rsplit <- rsplit_class <- class(x)[[1]] cli_abort( "There is no assessment data set for an {.arg rsplit} object - with class {rsplit_class}." + with class {.cls {rsplit_class}}." ) } ind <- as.integer(x, data = data, ...) diff --git a/tests/testthat/_snaps/permutations.md b/tests/testthat/_snaps/permutations.md index 94276f18..1f1b3571 100644 --- a/tests/testthat/_snaps/permutations.md +++ b/tests/testthat/_snaps/permutations.md @@ -4,7 +4,7 @@ assessment(xx$splits[[1]]) Condition Error in `as.data.frame()`: - ! There is no assessment data set for an `rsplit` object with class `perm_split`. + ! There is no assessment data set for an `rsplit` object with class . # printing diff --git a/tests/testthat/_snaps/permutations.new.md b/tests/testthat/_snaps/permutations.new.md deleted file mode 100644 index 515f099d..00000000 --- a/tests/testthat/_snaps/permutations.new.md +++ /dev/null @@ -1,30 +0,0 @@ -# no assessment set - - Code - assessment(xx$splits[[1]]) - Condition - Error in `as.data.frame()`: - ! There is no assessment data set for an `rsplit` object with class perm_split. - -# printing - - Code - permutations(mtcars, 1) - Output - # Permutation sampling - # Permuted columns: [mpg] - # A tibble: 25 x 2 - splits id - - 1 Permutations01 - 2 Permutations02 - 3 Permutations03 - 4 Permutations04 - 5 Permutations05 - 6 Permutations06 - 7 Permutations07 - 8 Permutations08 - 9 Permutations09 - 10 Permutations10 - # i 15 more rows - From 8fa19083fa83038bc9e772e9b828f78c305508b3 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Thu, 15 Aug 2024 10:51:21 -0700 Subject: [PATCH 12/82] More edits to bootci. Updates snapshot of tests --- R/bootci.R | 44 ++++++++++++++++----------------- tests/testthat/_snaps/bootci.md | 6 ++--- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index a1059118..510ff44f 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -6,12 +6,12 @@ check_rset <- function(x, app = TRUE) { if (!inherits(x, "bootstraps")) { - cli::cli_abort("{.arg .data} should be an `rset` object generated from {.fn bootstraps}") + cli_abort("{.arg .data} should be an `rset` object generated from {.fn bootstraps}") } if (app) { if (x %>% dplyr::filter(id == "Apparent") %>% nrow() != 1) { - cli::cli_abort("Please set `apparent = TRUE` in {.fn bootstraps} function") + cli_abort("Please set `apparent = TRUE` in {.fn bootstraps} function") } } invisible(NULL) @@ -28,15 +28,15 @@ std_exp <- c("std.error", "robust.se") check_tidy_names <- function(x, std_col) { # check for proper columns if (sum(colnames(x) == "estimate") != 1) { - cli::cli_abort(stat_nm_err) + cli_abort(stat_nm_err) } if (sum(colnames(x) == "term") != 1) { - cli::cli_abort(stat_nm_err) + cli_abort(stat_nm_err) } if (std_col) { std_candidates <- colnames(x) %in% std_exp if (sum(std_candidates) != 1) { - cli::cli_abort("{.arg statistics} should select a single column for the standard error.") + cli_abort("{.arg statistics} should select a single column for the standard error.") } } invisible(TRUE) @@ -57,7 +57,7 @@ check_tidy <- function(x, std_col = FALSE) { } if (inherits(x, "try-error")) { - cli::cli_abort(stat_fmt_err) + cli_abort(stat_fmt_err) } check_tidy_names(x, std_col) @@ -115,7 +115,7 @@ new_stats <- function(x, lo, hi) { has_dots <- function(x) { nms <- names(formals(x)) if (!any(nms == "...")) { - cli::cli_abort("{.arg .fn} must have an argument {.arg ...}.") + cli_abort("{.arg .fn} must have an argument {.arg ...}.") } invisible(NULL) } @@ -128,9 +128,7 @@ check_num_resamples <- function(x, B = 1000) { dplyr::filter(n < B) if (nrow(x) > 0) { - terms <- paste0("`", x$term, "`") - cli::cli_warn(paste0("Recommend at least {B} non-missing bootstrap ", - "resamples for {cli::qty(terms)} term{?s} {terms}.")) + cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {x$terms} term{?s}.") } invisible(NULL) } @@ -141,11 +139,11 @@ check_num_resamples <- function(x, B = 1000) { pctl_single <- function(stats, alpha = 0.05) { if (all(is.na(stats))) { - cli::cli_abort("All statistics have missing values.") + cli_abort("All statistics have missing values.") } if (!is.numeric(stats)) { - cli::cli_abort("{.arg stats} must be a numeric vector.") + cli_abort("{.arg stats} must be a numeric vector.") } # stats is a numeric vector of values @@ -250,7 +248,7 @@ int_pctl.bootstraps <- function(.data, statistics, alpha = 0.05, ...) { check_dots_empty() check_rset(.data, app = FALSE) if (length(alpha) != 1 || !is.numeric(alpha)) { - cli::cli_abort("{.arg alpha} must be a single numeric value.") + cli_abort("{.arg alpha} must be a single numeric value.") } .data <- .data %>% dplyr::filter(id != "Apparent") @@ -281,20 +279,20 @@ t_single <- function(stats, std_err, is_orig, alpha = 0.05) { # which_orig is the index of stats and std_err that has the original result if (all(is.na(stats))) { - cli::cli_abort("All statistics have missing values.") + cli_abort("All statistics have missing values.") } if (!is.logical(is_orig) || any(is.na(is_orig))) { - cli::cli_abort( + cli_abort( "{.arg is_orig} should be a logical column the same length as {.arg stats} with no missing values." ) } if (length(stats) != length(std_err) && length(stats) != length(is_orig)) { function_args <- c('stats', 'std_err', 'is_orig') - cli::cli_abort("{.arg {function_args}} should have the same length.") + cli_abort("{.arg {function_args}} should have the same length.") } if (sum(is_orig) != 1) { - cli::cli_abort("The original statistic must be in a single row.") + cli_abort("The original statistic must be in a single row.") } theta_obs <- stats[is_orig] @@ -332,12 +330,12 @@ int_t.bootstraps <- function(.data, statistics, alpha = 0.05, ...) { check_dots_empty() check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - cli::cli_abort("{.arg alpha} must be a single numeric value.") + cli_abort("{.arg alpha} must be a single numeric value.") } column_name <- tidyselect::vars_select(names(.data), !!enquo(statistics)) if (length(column_name) != 1) { - cli::cli_abort(stat_fmt_err) + cli_abort(stat_fmt_err) } stats <- .data %>% dplyr::select(!!column_name, id) stats <- check_tidy(stats, std_col = TRUE) @@ -359,7 +357,7 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) { # TODO check per term if (all(is.na(stats$estimate))) { - cli::cli_abort("All statistics have missing values.") + cli_abort("All statistics have missing values.") } ### Estimating Z0 bias-correction @@ -374,7 +372,7 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) { if (inherits(loo_test, "try-error")) { cat("Running `.fn` on the LOO resamples produced an error:\n") print(loo_test) - cli::cli_abort("{.arg .fn} failed.") + cli_abort("{.arg .fn} failed.") } loo_res <- furrr::future_map(loo_rs$splits, .fn, ...) %>% list_rbind() @@ -433,14 +431,14 @@ int_bca <- function(.data, ...) { int_bca.bootstraps <- function(.data, statistics, alpha = 0.05, .fn, ...) { check_rset(.data) if (length(alpha) != 1 || !is.numeric(alpha)) { - cli::cli_abort("{.arg alpha} must be a single numeric value.") + cli_abort("{.arg alpha} must be a single numeric value.") } has_dots(.fn) column_name <- tidyselect::vars_select(names(.data), !!enquo(statistics)) if (length(column_name) != 1) { - cli::cli_abort(stat_fmt_err) + cli_abort(stat_fmt_err) } stats <- .data %>% dplyr::select(!!column_name, id) stats <- check_tidy(stats) diff --git a/tests/testthat/_snaps/bootci.md b/tests/testthat/_snaps/bootci.md index 8632d77e..d3106318 100644 --- a/tests/testthat/_snaps/bootci.md +++ b/tests/testthat/_snaps/bootci.md @@ -4,7 +4,7 @@ int_pctl(bt_resamples, res) Condition Warning: - Recommend at least 1000 non-missing bootstrap resamples for term `mean`. + Recommend at least 1000 non-missing bootstrap resamples for `mean` term. Error in `pctl_single()`: ! All statistics have missing values. @@ -14,7 +14,7 @@ int_t(bt_resamples, res) Condition Warning: - Recommend at least 500 non-missing bootstrap resamples for term `mean`. + Recommend at least 500 non-missing bootstrap resamples for `mean` term. Error in `t_single()`: ! All statistics have missing values. @@ -24,7 +24,7 @@ int_bca(bt_resamples, res, .fn = bad_stats) Condition Warning: - Recommend at least 1000 non-missing bootstrap resamples for term `mean`. + Recommend at least 1000 non-missing bootstrap resamples for `mean` term. Error in `bca_calc()`: ! All statistics have missing values. From 08f4667e12f720d36a39b441cc5ad64e7d367e62 Mon Sep 17 00:00:00 2001 From: brshallo Date: Thu, 15 Aug 2024 11:06:20 -0700 Subject: [PATCH 13/82] use `fun()` instead of `fun` across docs, fixes #383 --- R/boot.R | 2 +- R/caret.R | 8 ++++---- R/initial_split.R | 14 +++++++------- R/labels.R | 2 +- R/nest.R | 2 +- R/rsplit.R | 4 ++-- R/tidy.R | 12 ++++++------ man/as.data.frame.rsplit.Rd | 4 ++-- man/bootstraps.Rd | 2 +- man/group_bootstraps.Rd | 2 +- man/initial_split.Rd | 10 +++++----- man/labels.rset.Rd | 2 +- man/nested_cv.Rd | 2 +- man/rsample2caret.Rd | 8 ++++---- man/tidy.rsplit.Rd | 10 +++++----- vignettes/Working_with_rsets.Rmd | 4 ++-- 16 files changed, 44 insertions(+), 44 deletions(-) diff --git a/R/boot.R b/R/boot.R index 887de4bd..fa8fb651 100644 --- a/R/boot.R +++ b/R/boot.R @@ -17,7 +17,7 @@ #' @param times The number of bootstrap samples. #' @param apparent A logical. Should an extra resample be added where the #' analysis and holdout subset are the entire data set. This is required for -#' some estimators used by the `summary` function that require the apparent +#' some estimators used by the `summary()` function that require the apparent #' error rate. #' @export #' @return A tibble with classes `bootstraps`, `rset`, `tbl_df`, `tbl`, and diff --git a/R/caret.R b/R/caret.R index 6b2808b3..609fbc19 100644 --- a/R/caret.R +++ b/R/caret.R @@ -4,10 +4,10 @@ #' \pkg{rsample} and \pkg{caret}. #' #' @param object An `rset` object. Currently, -#' `nested_cv` is not supported. -#' @return `rsample2caret` returns a list that mimics the +#' `nested_cv()` is not supported. +#' @return `rsample2caret()` returns a list that mimics the #' `index` and `indexOut` elements of a -#' `trainControl` object. `caret2rsample` returns an +#' `trainControl` object. `caret2rsample()` returns an #' `rset` object of the appropriate class. #' @export rsample2caret <- function(object, data = c("analysis", "assessment")) { @@ -23,7 +23,7 @@ rsample2caret <- function(object, data = c("analysis", "assessment")) { } #' @rdname rsample2caret -#' @param ctrl An object produced by `trainControl` that has +#' @param ctrl An object produced by `caret::trainControl()` that has #' had the `index` and `indexOut` elements populated by #' integers. One method of getting this is to extract the #' `control` objects from an object produced by `train`. diff --git a/R/initial_split.R b/R/initial_split.R index 66157c76..1de20789 100644 --- a/R/initial_split.R +++ b/R/initial_split.R @@ -1,18 +1,18 @@ #' Simple Training/Test Set Splitting #' -#' `initial_split` creates a single binary split of the data into a training -#' set and testing set. `initial_time_split` does the same, but takes the +#' `initial_split()` creates a single binary split of the data into a training +#' set and testing set. `initial_time_split()` does the same, but takes the #' _first_ `prop` samples for training, instead of a random selection. -#' `group_initial_split` creates splits of the data based +#' `group_initial_split()` creates splits of the data based #' on some grouping variable, so that all data in a "group" is assigned to #' the same split. -#' `training` and `testing` are used to extract the resulting data. +#' `training()` and `testing()` are used to extract the resulting data. #' @template strata_details #' @inheritParams vfold_cv #' @inheritParams make_strata #' @param prop The proportion of data to be retained for modeling/analysis. #' @export -#' @return An `rsplit` object that can be used with the `training` and `testing` +#' @return An `rsplit` object that can be used with the `training()` and `testing()` #' functions to extract the data in each split. #' @examplesIf rlang::is_installed("modeldata") #' set.seed(1353) @@ -176,12 +176,12 @@ group_initial_split <- function(data, group, prop = 3 / 4, ..., strata = NULL, p attrib <- .get_split_args(res, allow_strata_false = TRUE) res <- res$splits[[1]] - + attrib$times <- NULL for (i in names(attrib)) { attr(res, i) <- attrib[[i]] } class(res) <- c("group_initial_split", "initial_split", class(res)) - + res } diff --git a/R/labels.R b/R/labels.R index 53e7bfe6..9cfe767e 100644 --- a/R/labels.R +++ b/R/labels.R @@ -1,7 +1,7 @@ #' Find Labels from rset Object #' #' Produce a vector of resampling labels (e.g. "Fold1") from -#' an `rset` object. Currently, `nested_cv` +#' an `rset` object. Currently, `nested_cv()` #' is not supported. #' #' @param object An `rset` object diff --git a/R/nest.R b/R/nest.R index d3c6b080..72058c85 100644 --- a/R/nest.R +++ b/R/nest.R @@ -1,6 +1,6 @@ #' Nested or Double Resampling #' -#' `nested_cv` can be used to take the results of one resampling procedure +#' `nested_cv()` can be used to take the results of one resampling procedure #' and conduct further resamples within each split. Any type of resampling #' used in `rsample` can be used. #' diff --git a/R/rsplit.R b/R/rsplit.R index 96234934..d19af5a1 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -66,8 +66,8 @@ as.integer.rsplit <- #' #' The analysis or assessment code can be returned as a data #' frame (as dictated by the `data` argument) using -#' `as.data.frame.rsplit`. `analysis` and -#' `assessment` are shortcuts. +#' `as.data.frame.rsplit()`. `analysis()` and +#' `assessment()` are shortcuts. #' @param x An `rsplit` object. #' @param row.names `NULL` or a character vector giving the row names for the data frame. Missing values are not allowed. #' @param optional A logical: should the column names of the data be checked for legality? diff --git a/R/tidy.R b/R/tidy.R index acba8b33..4d268c2a 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -1,6 +1,6 @@ #' Tidy Resampling Object #' -#' The `tidy` function from the \pkg{broom} package can be used on `rset` and +#' The `tidy()` function from the \pkg{broom} package can be used on `rset` and #' `rsplit` objects to generate tibbles with which rows are in the analysis and #' assessment sets. #' @param x A `rset` or `rsplit` object @@ -9,11 +9,11 @@ #' sample for the same row in the original data. #' @inheritParams rlang::args_dots_empty #' @return A tibble with columns `Row` and `Data`. The latter has possible -#' values "Analysis" or "Assessment". For `rset` inputs, identification columns -#' are also returned but their names and values depend on the type of -#' resampling. `vfold_cv` contains a column "Fold" and, if repeats are used, -#' another called "Repeats". `bootstraps` and `mc_cv` use the column -#' "Resample". +#' values "Analysis" or "Assessment". For `rset` inputs, identification +#' columns are also returned but their names and values depend on the type of +#' resampling. For `vfold_cv()`, contains a column "Fold" and, if repeats are +#' used, another called "Repeats". `bootstraps()` and `mc_cv()` use the column +#' "Resample". #' @details Note that for nested resampling, the rows of the inner resample, #' named `inner_Row`, are *relative* row indices and do not correspond to the #' rows in the original data set. diff --git a/man/as.data.frame.rsplit.Rd b/man/as.data.frame.rsplit.Rd index 6a494c15..6de9ad42 100644 --- a/man/as.data.frame.rsplit.Rd +++ b/man/as.data.frame.rsplit.Rd @@ -38,8 +38,8 @@ assessment(x, ...) \description{ The analysis or assessment code can be returned as a data frame (as dictated by the \code{data} argument) using -\code{as.data.frame.rsplit}. \code{analysis} and -\code{assessment} are shortcuts. +\code{as.data.frame.rsplit()}. \code{analysis()} and +\code{assessment()} are shortcuts. } \examples{ library(dplyr) diff --git a/man/bootstraps.Rd b/man/bootstraps.Rd index 164f1835..93854d30 100644 --- a/man/bootstraps.Rd +++ b/man/bootstraps.Rd @@ -33,7 +33,7 @@ of stratifying groups that are too small.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary} function that require the apparent +some estimators used by the \code{summary()} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/group_bootstraps.Rd b/man/group_bootstraps.Rd index ad68eb6d..e93dccd3 100644 --- a/man/group_bootstraps.Rd +++ b/man/group_bootstraps.Rd @@ -25,7 +25,7 @@ assessment set within a fold.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary} function that require the apparent +some estimators used by the \code{summary()} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/initial_split.Rd b/man/initial_split.Rd index 740bf66f..efc096d0 100644 --- a/man/initial_split.Rd +++ b/man/initial_split.Rd @@ -61,17 +61,17 @@ grouping observations with the same value to either the analysis or assessment set within a fold.} } \value{ -An \code{rsplit} object that can be used with the \code{training} and \code{testing} +An \code{rsplit} object that can be used with the \code{training()} and \code{testing()} functions to extract the data in each split. } \description{ -\code{initial_split} creates a single binary split of the data into a training -set and testing set. \code{initial_time_split} does the same, but takes the +\code{initial_split()} creates a single binary split of the data into a training +set and testing set. \code{initial_time_split()} does the same, but takes the \emph{first} \code{prop} samples for training, instead of a random selection. -\code{group_initial_split} creates splits of the data based +\code{group_initial_split()} creates splits of the data based on some grouping variable, so that all data in a "group" is assigned to the same split. -\code{training} and \code{testing} are used to extract the resulting data. +\code{training()} and \code{testing()} are used to extract the resulting data. } \details{ With a \code{strata} argument, the random sampling is conducted diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 6aa2ca12..2bc14356 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -22,7 +22,7 @@ A single character or factor vector. } \description{ Produce a vector of resampling labels (e.g. "Fold1") from -an \code{rset} object. Currently, \code{nested_cv} +an \code{rset} object. Currently, \code{nested_cv()} is not supported. } \examples{ diff --git a/man/nested_cv.Rd b/man/nested_cv.Rd index 826a9c47..3bca035e 100644 --- a/man/nested_cv.Rd +++ b/man/nested_cv.Rd @@ -25,7 +25,7 @@ and a column of nested tibbles called \code{inner_resamples} with the additional resamples. } \description{ -\code{nested_cv} can be used to take the results of one resampling procedure +\code{nested_cv()} can be used to take the results of one resampling procedure and conduct further resamples within each split. Any type of resampling used in \code{rsample} can be used. } diff --git a/man/rsample2caret.Rd b/man/rsample2caret.Rd index ca0605f1..ac712c5b 100644 --- a/man/rsample2caret.Rd +++ b/man/rsample2caret.Rd @@ -11,20 +11,20 @@ caret2rsample(ctrl, data = NULL) } \arguments{ \item{object}{An \code{rset} object. Currently, -\code{nested_cv} is not supported.} +\code{nested_cv()} is not supported.} \item{data}{The data that was originally used to produce the \code{ctrl} object.} -\item{ctrl}{An object produced by \code{trainControl} that has +\item{ctrl}{An object produced by \code{caret::trainControl()} that has had the \code{index} and \code{indexOut} elements populated by integers. One method of getting this is to extract the \code{control} objects from an object produced by \code{train}.} } \value{ -\code{rsample2caret} returns a list that mimics the +\code{rsample2caret()} returns a list that mimics the \code{index} and \code{indexOut} elements of a -\code{trainControl} object. \code{caret2rsample} returns an +\code{trainControl} object. \code{caret2rsample()} returns an \code{rset} object of the appropriate class. } \description{ diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index 01c419ec..3d75c389 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -26,14 +26,14 @@ sample for the same row in the original data.} } \value{ A tibble with columns \code{Row} and \code{Data}. The latter has possible -values "Analysis" or "Assessment". For \code{rset} inputs, identification columns -are also returned but their names and values depend on the type of -resampling. \code{vfold_cv} contains a column "Fold" and, if repeats are used, -another called "Repeats". \code{bootstraps} and \code{mc_cv} use the column +values "Analysis" or "Assessment". For \code{rset} inputs, identification +columns are also returned but their names and values depend on the type of +resampling. For \code{vfold_cv()}, contains a column "Fold" and, if repeats are +used, another called "Repeats". \code{bootstraps()} and \code{mc_cv()} use the column "Resample". } \description{ -The \code{tidy} function from the \pkg{broom} package can be used on \code{rset} and +The \code{tidy()} function from the \pkg{broom} package can be used on \code{rset} and \code{rsplit} objects to generate tibbles with which rows are in the analysis and assessment sets. } diff --git a/vignettes/Working_with_rsets.Rmd b/vignettes/Working_with_rsets.Rmd index 8f59b2e8..ab112ea1 100644 --- a/vignettes/Working_with_rsets.Rmd +++ b/vignettes/Working_with_rsets.Rmd @@ -109,7 +109,7 @@ example[1:10, setdiff(names(example), names(attrition))] For this model, the `.fitted` value is the linear predictor in log-odds units. -To compute this data set for each of the 100 resamples, we'll use the `map` function from the `purrr` package: +To compute this data set for each of the 100 resamples, we'll use the `map()` function from the `purrr` package: ```{r model_purrr, warning=FALSE} library(purrr) @@ -182,7 +182,7 @@ The calculated 95% confidence interval contains zero, so we don't have evidence ## Bootstrap Estimates of Model Coefficients -Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map` can be used to estimate and save these values for each split. +Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy()` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map()` can be used to estimate and save these values for each split. ```{r coefs} From 0cc2c23e7af32d468a49974faab345d56d32d1fb Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 20:24:43 +0200 Subject: [PATCH 14/82] changed all the rlang::abort to cli::cli_abort. There are two information message which needed to be changed to a vector and there is one class message where the semantics had to be adjusted. Fixes #506 --- R/initial_validation_split.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index e3ce84d6..e1b96012 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -120,25 +120,25 @@ initial_validation_split <- function(data, check_prop_3 <- function(prop, call = rlang::caller_env()) { if (!is.numeric(prop)) { - rlang::abort("`prop` needs to be numeric.", call = call) + cli::cli_abort("`prop` needs to be numeric.", call = call) } if (any(is.na(prop))) { - rlang::abort("`prop` cannot contain `NA`.", call = call) + cli::cli_abort("`prop` cannot contain `NA`.", call = call) } if (any(is.null(prop))) { - rlang::abort("`prop` cannot contain `NULL`.", call = call) + cli::cli_abort("`prop` cannot contain `NULL`.", call = call) } if (length(prop) != 2L) { - rlang::abort( + cli::cli_abort( "`prop` needs to contain the proportions for training and validation.", call = call ) } if (any(!(prop > 0)) | any(!(prop < 1))) { - rlang::abort("Elements of `prop` need to be in (0, 1).", call = call) + cli::cli_abort("Elements of `prop` need to be in (0, 1).", call = call) } if (!(sum(prop) > 0 ) | !(sum(prop) < 1) ) { - rlang::abort( + cli::cli_abort( "The sum of the proportions in `prop` needs to be in (0, 1).", call = call ) @@ -304,7 +304,7 @@ validation <- function(x, ...) { validation.default <- function(x, ...) { cls <- class(x) cli::cli_abort( - "No method for objects of class{?es}: {cls}" + "No method for objects of class{?es}: {.cls {cls}} " ) } @@ -321,18 +321,18 @@ validation.initial_validation_split <- function(x, ...) { #' @export #' @keywords internal analysis.initial_validation_split <- function(x, ...) { - rlang::abort( - "The initial validation split does not contain an analysis set.", - i = "You can access the training data with `training()`." + cli::cli_abort( + c("The initial validation split does not contain an analysis set.", + "i" = "You can access the training data with {.fun training}.") ) } #' @export #' @keywords internal assessment.initial_validation_split <- function(x, ...) { - rlang::abort( - "The initial validation split does not contain an assessment set.", - i = "You can access the testing data with `testing()`." + cli::cli_abort( + c("The initial validation split does not contain an assessment set.", + "i" = "You can access the testing data with {.fun testing}.") ) } From 0e87add6292faf91565fadafcf0e35134765ea95 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 15 Aug 2024 11:27:05 -0700 Subject: [PATCH 15/82] add James to NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2e678b33..bd4a0214 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). +* Started moving error messages to cli (#499, #502). With contributions from @JamesHWade (#518). ## Bug fixes From 8adbb77f585b886118f3dbaa2060b8950cbfbf91 Mon Sep 17 00:00:00 2001 From: ccani007 Date: Thu, 15 Aug 2024 11:42:40 -0700 Subject: [PATCH 16/82] Fixed #464 improved spacing in pretty methods. --- R/printing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/printing.R b/R/printing.R index 9a37254f..d182597c 100644 --- a/R/printing.R +++ b/R/printing.R @@ -55,10 +55,10 @@ pretty.mc_cv <- function(x, ...) { signif(1 - details$prop, 2), ") with ", details$times, - " resamples " + " resamples" ) if (has_strata(details)) { - res <- paste(res, "using stratification") + res <- paste(res, " using stratification") } res } From fc4436ccab18453dba91f3da170949a96d210d96 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 20:43:19 +0200 Subject: [PATCH 17/82] removed the 'cli::' descriptor from cli_abort. fixes #506 --- R/initial_validation_split.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index e1b96012..f95bf1b6 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -120,25 +120,25 @@ initial_validation_split <- function(data, check_prop_3 <- function(prop, call = rlang::caller_env()) { if (!is.numeric(prop)) { - cli::cli_abort("`prop` needs to be numeric.", call = call) + cli_abort("`prop` needs to be numeric.", call = call) } if (any(is.na(prop))) { - cli::cli_abort("`prop` cannot contain `NA`.", call = call) + cli_abort("`prop` cannot contain `NA`.", call = call) } if (any(is.null(prop))) { - cli::cli_abort("`prop` cannot contain `NULL`.", call = call) + cli_abort("`prop` cannot contain `NULL`.", call = call) } if (length(prop) != 2L) { - cli::cli_abort( + cli_abort( "`prop` needs to contain the proportions for training and validation.", call = call ) } if (any(!(prop > 0)) | any(!(prop < 1))) { - cli::cli_abort("Elements of `prop` need to be in (0, 1).", call = call) + cli_abort("Elements of `prop` need to be in (0, 1).", call = call) } if (!(sum(prop) > 0 ) | !(sum(prop) < 1) ) { - cli::cli_abort( + cli_abort( "The sum of the proportions in `prop` needs to be in (0, 1).", call = call ) @@ -303,7 +303,7 @@ validation <- function(x, ...) { #' @rdname initial_validation_split validation.default <- function(x, ...) { cls <- class(x) - cli::cli_abort( + cli_abort( "No method for objects of class{?es}: {.cls {cls}} " ) } @@ -321,7 +321,7 @@ validation.initial_validation_split <- function(x, ...) { #' @export #' @keywords internal analysis.initial_validation_split <- function(x, ...) { - cli::cli_abort( + cli_abort( c("The initial validation split does not contain an analysis set.", "i" = "You can access the training data with {.fun training}.") ) @@ -330,7 +330,7 @@ analysis.initial_validation_split <- function(x, ...) { #' @export #' @keywords internal assessment.initial_validation_split <- function(x, ...) { - cli::cli_abort( + cli_abort( c("The initial validation split does not contain an assessment set.", "i" = "You can access the testing data with {.fun testing}.") ) From a80cd6f9c232f7f66ff2bfcc31b1598d6c6ad744 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 15 Aug 2024 11:48:42 -0700 Subject: [PATCH 18/82] add Seb to NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2e678b33..0a0e5147 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Started moving error messages to cli (#499, #502). +* Fixed example for `nested_cv()` (@seb09, #520). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From bdda1c5e362f54179583f2d395593b382cf1abc8 Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Thu, 15 Aug 2024 12:15:54 -0700 Subject: [PATCH 19/82] deprecate rolling_origin, issue #448 --- R/rolling_origin.R | 6 ++++++ vignettes/Common_Patterns.Rmd | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 1a49993b..96352293 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -59,6 +59,12 @@ #' @export rolling_origin <- function(data, initial = 5, assess = 1, cumulative = TRUE, skip = 0, lag = 0, ...) { + + lifecycle::signal_stage( + stage = "superseded", what = "rolling_origin()", + with = I("sliding_window(), sliding_index() and sliding_period()") + ) + check_dots_empty() n <- nrow(data) diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index 039cf0ac..a69a0c7d 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -223,11 +223,3 @@ sliding_period(Chicago, date, "year") %>% head(2) ``` -All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `rolling_origin()` function: - -```{r} -rolling_origin(Chicago) %>% - head(2) -``` - -Note that all of these time-based resampling functions are deterministic: unlike the rest of the package, running these functions repeatedly under different random seeds will always return the same results. From 41955081bcd0c3c5f082d09c1fa110bc30b21e40 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 22:07:41 +0200 Subject: [PATCH 20/82] Used cli abort for make_groups.r mc.R and nest.R --- R/make_groups.R | 8 ++++---- R/mc.R | 4 ++-- R/nest.R | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/make_groups.R b/R/make_groups.R index d5dabf24..0de528f8 100644 --- a/R/make_groups.R +++ b/R/make_groups.R @@ -296,7 +296,7 @@ check_prop <- function(prop, replace) { ((prop <= 1 && replace) || (prop < 1 && !replace)) acceptable_prop <- acceptable_prop && prop > 0 if (!acceptable_prop) { - rlang::abort( + cli_abort( "`prop` must be a number between 0 and 1.", call = rlang::caller_env() ) @@ -345,13 +345,13 @@ validate_group <- function(group, data, call = rlang::caller_env()) { } if (is.null(group) || !is.character(group) || length(group) != 1) { - rlang::abort( - "`group` should be a single character value for the column that will be used for splitting.", + cli_abort( + "{.arg {group}} should be a single character value for the column that will be used for splitting.", call = call ) } if (!any(names(data) == group)) { - rlang::abort("`group` should be a column in `data`.", call = call) + cli_abort("`group` should be a column in `data`.", call = call) } group diff --git a/R/mc.R b/R/mc.R index 198f90ba..1eed1822 100644 --- a/R/mc.R +++ b/R/mc.R @@ -104,7 +104,7 @@ mc_complement <- function(ind, n) { mc_splits <- function(data, prop = 3 / 4, times = 25, strata = NULL, breaks = 4, pool = 0.1) { if (!is.numeric(prop) | prop >= 1 | prop <= 0) { - rlang::abort("`prop` must be a number on (0, 1).") + cli_abort("`prop` must be a number on (0, 1).") } n <- nrow(data) @@ -244,7 +244,7 @@ group_mc_splits <- function(data, group, prop = 3 / 4, times = 25, strata = NULL all_assessable <- purrr::map(split_objs, function(x) nrow(assessment(x))) if (any(all_assessable == 0)) { - rlang::abort( + cli_abort( c( "Some assessment sets contained zero rows", i = "Consider using a non-grouped resampling method" diff --git a/R/nest.R b/R/nest.R index d3c6b080..da6ac0b1 100644 --- a/R/nest.R +++ b/R/nest.R @@ -80,9 +80,9 @@ nested_cv <- function(data, outside, inside) { inner_cl <- cl[["inside"]] if (!is_call(inner_cl)) { - abort( - "`inside` should be a expression such as `vfold()` or ", - "bootstraps(times = 10)` instead of an existing object.", + cli_abort( + "{.arg {inside}} should be a expression such as {.fun vfold} or // + {.code bootstraps(times = 10)} instead of an existing object." ) } inside <- map(outside$splits, inside_resample, cl = inner_cl, env = env) From f34e00da43d74c3d8061b2a08df25d99d048512a Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 22:17:27 +0200 Subject: [PATCH 21/82] cli_abort for make_group updated. Fixes #508 --- R/make_groups.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/make_groups.R b/R/make_groups.R index 0de528f8..5b5ef4a2 100644 --- a/R/make_groups.R +++ b/R/make_groups.R @@ -351,7 +351,7 @@ validate_group <- function(group, data, call = rlang::caller_env()) { ) } if (!any(names(data) == group)) { - cli_abort("`group` should be a column in `data`.", call = call) + cli_abort("{.arg {group}} should be a column in {.arg {data}}.", call = call) } group From 5c9fc930000efc9f16cdaa044f9993a13eb38a9c Mon Sep 17 00:00:00 2001 From: agmurray Date: Thu, 15 Aug 2024 13:24:25 -0700 Subject: [PATCH 22/82] Audited backticks on the rsample package. Several instances of packages found that were still backticked, mainly from NEWS file and a few others. Fixes #503 --- NEWS.md | 30 ++++++++++++++-------------- R/make_strata.R | 2 +- R/misc.R | 2 +- R/nest.R | 2 +- R/permutations.R | 4 ++-- man/get_fingerprint.Rd | 2 +- man/make_strata.Rd | 2 +- man/nested_cv.Rd | 2 +- man/permutations.Rd | 4 ++-- vignettes/Applications/Intervals.Rmd | 2 +- vignettes/Working_with_rsets.Rmd | 8 ++++---- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2e678b33..fa4979e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -126,7 +126,7 @@ * Attempts to stratify on a `Surv` object now error more informatively (#230). -* Exposed `pool` argument from `make_strata()` in user-facing resampling functions (#229). +* Exposed argument from `make_strata()` in user-facing resampling functions (#229). * Deprecated the `gather()` method for `rset` objects in favor of `tidyr::pivot_longer()` (#233). @@ -144,7 +144,7 @@ * The `reg_intervals()` function is a convenience function for `lm()`, `glm()`, `survreg()`, and `coxph()` models (#206). -* A few internal functions were exported so that `rsample`-adjacent packages can use the same underlying code. +* A few internal functions were exported so that rsample-adjacent packages can use the same underlying code. * The `obj_sum()` method for `rsplit` objects was updated (#215). @@ -165,11 +165,11 @@ * The `print()` methods for `rsplit` and `val_split` objects were adjusted to show `""` and ``, respectively. -* The `drinks`, `attrition`, and `two_class_dat` data sets were removed. They are in the `modeldata` package. +* The `drinks`, `attrition`, and `two_class_dat` data sets were removed. They are in the modeldata package. -* Compatability with `dplyr` 1.0.0. +* Compatability with dplyr 1.0.0. -# `rsample` 0.0.6 +# rsample 0.0.6 * Added `validation_set()` for making a single resample. @@ -181,7 +181,7 @@ * `initial_time_split()` and `rolling_origin()` now have a `lag` parameter that ensures that previous data are available so that lagged variables can be calculated. (#135, #136) -# `rsample` 0.0.5 +# rsample 0.0.5 * Added three functions to compute different bootstrap confidence intervals. * A new function (`add_resample_id()`) augments a data frame with columns for the resampling identifier. @@ -189,16 +189,16 @@ * Updated `initial_split()`, `mc_cv()`, `vfold_cv()`, `bootstraps()` with new `breaks` parameter that specifies the number of bins to stratify by for a numeric stratification variable. -# `rsample` 0.0.4 +# rsample 0.0.4 Small maintenance release. ## Minor improvements and fixes * `fill()` was removed per the deprecation warning. - * Small changes were made for the new version of `tibble`. + * Small changes were made for the new version of tibble. -# `rsample` 0.0.3 +# rsample 0.0.3 ## New features @@ -210,25 +210,25 @@ Small maintenance release. * Changed the R version requirement to be R >= 3.1 instead of 3.3.3. -* The `recipes`-related `prepper` function was [moved to the `recipes` package](https://github.com/tidymodels/rsample/issues/48). This makes the `rsample` install footprint much smaller. +* The recipes-related `prepper()` function was [moved to the recipes package](https://github.com/tidymodels/rsample/issues/48). This makes the rsample install footprint much smaller. * `rsplit` objects are shown differently inside of a tibble. -* Moved from the `broom` package to the `generics` package. +* Moved from the broom package to the generics package. -# `rsample` 0.0.2 +# rsample 0.0.2 * `initial_split`, `training`, and `testing` were added to do training/testing splits prior to resampling. * Another resampling method, `group_vfold_cv`, was added. * `caret2rsample` and `rsample2caret` can convert `rset` objects to those used by `caret::trainControl` and vice-versa. * A function called `form_pred` can be used to determine the original names of the predictors in a formula or `terms` object. -* A vignette and a function (`prepper`) were included to facilitate using the `recipes` with `rsample`. +* A vignette and a function (`prepper`) were included to facilitate using the recipes with rsample. * A `gather` method was added for `rset` objects. * A `labels` method was added for `rsplit` objects. This can help identify which resample is being used even when the whole `rset` object is not available. -* A variety of `dplyr` methods were added (e.g. `filter`, `mutate`, etc) that work without dropping classes or attributes of the `rsample` objects. +* A variety of dplyr methods were added (e.g. `filter()`, `mutate()`, etc) that work without dropping classes or attributes of the `rsample` objects. -# `rsample` 0.0.1 (2017-07-08) +# rsample 0.0.1 (2017-07-08) Initial public version on CRAN diff --git a/R/make_strata.R b/R/make_strata.R index dc498962..cef69225 100644 --- a/R/make_strata.R +++ b/R/make_strata.R @@ -51,7 +51,7 @@ #' table(x3) #' table(make_strata(x3)) #' -#' # `oilType` data from `caret` +#' # `oilType` data from #' x4 <- rep(LETTERS[1:7], c(37, 26, 3, 7, 11, 10, 2)) #' table(x4) #' table(make_strata(x4)) diff --git a/R/misc.R b/R/misc.R index 97119298..20b6a377 100644 --- a/R/misc.R +++ b/R/misc.R @@ -125,7 +125,7 @@ split_unnamed <- function(x, f) { #' @param x An `rset` or `tune_results` object. #' @param ... Not currently used. #' @return A character value or `NA_character_` if the object was created prior -#' to `rsample` version 0.1.0. +#' to rsample version 0.1.0. #' @rdname get_fingerprint #' @aliases .get_fingerprint #' @examples diff --git a/R/nest.R b/R/nest.R index d3c6b080..3077d79e 100644 --- a/R/nest.R +++ b/R/nest.R @@ -2,7 +2,7 @@ #' #' `nested_cv` can be used to take the results of one resampling procedure #' and conduct further resamples within each split. Any type of resampling -#' used in `rsample` can be used. +#' used in rsample can be used. #' #' @details #' It is a bad idea to use bootstrapping as the outer resampling procedure (see diff --git a/R/permutations.R b/R/permutations.R index 8e2df6b0..e5a42a13 100644 --- a/R/permutations.R +++ b/R/permutations.R @@ -5,12 +5,12 @@ #' by permuting/shuffling one or more columns. This results in analysis #' samples where some columns are in their original order and some columns #' are permuted to a random order. Unlike other sampling functions in -#' `rsample`, there is no assessment set and calling `assessment()` on a +#' rsample, there is no assessment set and calling `assessment()` on a #' permutation split will throw an error. #' #' @param data A data frame. #' @param permute One or more columns to shuffle. This argument supports -#' `tidyselect` selectors. Multiple expressions can be combined with `c()`. +#' tidyselect selectors. Multiple expressions can be combined with `c()`. #' Variable names can be used as if they were positions in the data frame, so #' expressions like `x:y` can be used to select a range of variables. #' See \code{\link[tidyselect]{language}} for more details. diff --git a/man/get_fingerprint.Rd b/man/get_fingerprint.Rd index 14492dd3..cc912420 100644 --- a/man/get_fingerprint.Rd +++ b/man/get_fingerprint.Rd @@ -19,7 +19,7 @@ } \value{ A character value or \code{NA_character_} if the object was created prior -to \code{rsample} version 0.1.0. +to rsample version 0.1.0. } \description{ This function returns a hash (or NA) for an attribute that is created when diff --git a/man/make_strata.Rd b/man/make_strata.Rd index 9023356d..c2b7b434 100644 --- a/man/make_strata.Rd +++ b/man/make_strata.Rd @@ -64,7 +64,7 @@ x3 <- factor(x2) table(x3) table(make_strata(x3)) -# `oilType` data from `caret` +# `oilType` data from x4 <- rep(LETTERS[1:7], c(37, 26, 3, 7, 11, 10, 2)) table(x4) table(make_strata(x4)) diff --git a/man/nested_cv.Rd b/man/nested_cv.Rd index 826a9c47..a592c8cf 100644 --- a/man/nested_cv.Rd +++ b/man/nested_cv.Rd @@ -27,7 +27,7 @@ additional resamples. \description{ \code{nested_cv} can be used to take the results of one resampling procedure and conduct further resamples within each split. Any type of resampling -used in \code{rsample} can be used. +used in rsample can be used. } \details{ It is a bad idea to use bootstrapping as the outer resampling procedure (see diff --git a/man/permutations.Rd b/man/permutations.Rd index 4e543195..c2773bed 100644 --- a/man/permutations.Rd +++ b/man/permutations.Rd @@ -10,7 +10,7 @@ permutations(data, permute = NULL, times = 25, apparent = FALSE, ...) \item{data}{A data frame.} \item{permute}{One or more columns to shuffle. This argument supports -\code{tidyselect} selectors. Multiple expressions can be combined with \code{c()}. +tidyselect selectors. Multiple expressions can be combined with \code{c()}. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can be used to select a range of variables. See \code{\link[tidyselect]{language}} for more details.} @@ -33,7 +33,7 @@ A permutation sample is the same size as the original data set and is made by permuting/shuffling one or more columns. This results in analysis samples where some columns are in their original order and some columns are permuted to a random order. Unlike other sampling functions in -\code{rsample}, there is no assessment set and calling \code{assessment()} on a +rsample, there is no assessment set and calling \code{assessment()} on a permutation split will throw an error. } \details{ diff --git a/vignettes/Applications/Intervals.Rmd b/vignettes/Applications/Intervals.Rmd index ed700e8f..832fe5c2 100644 --- a/vignettes/Applications/Intervals.Rmd +++ b/vignettes/Applications/Intervals.Rmd @@ -193,7 +193,7 @@ intervals %>% split(intervals$term) For bias-corrected and accelerated (BCa) intervals, an additional argument is required. The `.fn` argument is a function that computes the statistic of interest. The first argument should be for the `rsplit` object and other arguments can be passed in using the ellipses. -These intervals use an internal leave-one-out resample to compute the Jackknife statistic and will recompute the statistic for _every bootstrap resample_. If the statistic is expensive to compute, this may take some time. For those calculations, we use the `furrr` package so these can be computed in parallel if you have set up a parallel processing plan (see `?future::plan`). +These intervals use an internal leave-one-out resample to compute the Jackknife statistic and will recompute the statistic for _every bootstrap resample_. If the statistic is expensive to compute, this may take some time. For those calculations, we use the furrr package so these can be computed in parallel if you have set up a parallel processing plan (see `?future::plan`). The user-facing function takes an argument for the function and the ellipses. diff --git a/vignettes/Working_with_rsets.Rmd b/vignettes/Working_with_rsets.Rmd index 8f59b2e8..4d6b4a2b 100644 --- a/vignettes/Working_with_rsets.Rmd +++ b/vignettes/Working_with_rsets.Rmd @@ -72,7 +72,7 @@ Now let's write a function that will, for each resample: 1. obtain the analysis data set (i.e. the 90% used for modeling) 1. fit a logistic regression model -1. predict the assessment data (the other 10% not used for the model) using the `broom` package +1. predict the assessment data (the other 10% not used for the model) using the broom package 1. determine if each sample was predicted correctly. Here is our function: @@ -109,7 +109,7 @@ example[1:10, setdiff(names(example), names(attrition))] For this model, the `.fitted` value is the linear predictor in log-odds units. -To compute this data set for each of the 100 resamples, we'll use the `map` function from the `purrr` package: +To compute this data set for each of the 100 resamples, we'll use the `map` function from the package: ```{r model_purrr, warning=FALSE} library(purrr) @@ -182,7 +182,7 @@ The calculated 95% confidence interval contains zero, so we don't have evidence ## Bootstrap Estimates of Model Coefficients -Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map` can be used to estimate and save these values for each split. +Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [broom package](https://cran.r-project.org/package=broom) package has a `tidy` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map()` can be used to estimate and save these values for each split. ```{r coefs} @@ -200,7 +200,7 @@ bt_resamples$betas[[1]] ## Keeping Tidy -As previously mentioned, the [`broom` package](https://cran.r-project.org/package=broom) contains a class called `tidy` that created representations of objects that can be easily used for analysis, plotting, etc. rsample contains `tidy` methods for `rset` and `rsplit` objects. For example: +As previously mentioned, the [broom package](https://cran.r-project.org/package=broom) contains a class called `tidy` that created representations of objects that can be easily used for analysis, plotting, etc. rsample contains `tidy` methods for `rset` and `rsplit` objects. For example: ```{r tidy_rsplit} first_resample <- bt_resamples$splits[[1]] From ddc6ade2dc0772f1d5f9ed19b21de280de2ce699 Mon Sep 17 00:00:00 2001 From: agmurray Date: Thu, 15 Aug 2024 13:29:22 -0700 Subject: [PATCH 23/82] Realized one error in prior pull request--(line 129 of NEWS file) where I inadvertantly deleted a word. Fixed and updated. --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index fa4979e7..9e96a39d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -126,7 +126,7 @@ * Attempts to stratify on a `Surv` object now error more informatively (#230). -* Exposed argument from `make_strata()` in user-facing resampling functions (#229). +* Exposed `pool` argument from `make_strata()` in user-facing resampling functions (#229). * Deprecated the `gather()` method for `rset` objects in favor of `tidyr::pivot_longer()` (#233). From a8c750ab193b193775ecc07f46d0e01d3b3088e9 Mon Sep 17 00:00:00 2001 From: ccani007 Date: Thu, 15 Aug 2024 13:43:30 -0700 Subject: [PATCH 24/82] Update snapshot test (whitespace change) --- tests/testthat/_snaps/mc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/mc.md b/tests/testthat/_snaps/mc.md index 3041e784..17708974 100644 --- a/tests/testthat/_snaps/mc.md +++ b/tests/testthat/_snaps/mc.md @@ -3,7 +3,7 @@ Code mc_cv(warpbreaks) Output - # Monte Carlo cross-validation (0.75/0.25) with 25 resamples + # Monte Carlo cross-validation (0.75/0.25) with 25 resamples # A tibble: 25 x 2 splits id From 0e76b9390e24d27f28c4d26516306ab6b3fda6f0 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 23:13:22 +0200 Subject: [PATCH 25/82] changed all rlang::abort to cli_abort. There were lot os paste0 rlang abort, they have been changed to proper syntax for cli_abort. --- R/slide.R | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/R/slide.R b/R/slide.R index d6862f17..8d4d5c7e 100644 --- a/R/slide.R +++ b/R/slide.R @@ -216,7 +216,7 @@ sliding_window <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - rlang::abort("`data` must be a data frame.") + cli_abort("{.arg data} must be a {.cls data frame}.") } lookback <- check_lookback(lookback) @@ -226,7 +226,7 @@ sliding_window <- function(data, skip <- check_skip(skip) if (assess_start > assess_stop) { - rlang::abort("`assess_start` must be less than or equal to `assess_stop`.") + cli_abort("{.arg assess_start} must be less than or equal to {.arg assess_stop}.") } seq <- vctrs::vec_seq_along(data) @@ -299,7 +299,7 @@ sliding_index <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - rlang::abort("`data` must be a data frame.") + cli_abort("{.arg data} must be a {.cls data frame}.") } step <- check_step(step) @@ -309,7 +309,7 @@ sliding_index <- function(data, loc <- tidyselect::eval_select(index, data) if (length(loc) != 1L) { - rlang::abort("`index` must specify exactly one column in `data`.") + cli_abort("{.arg index} must specify exactly one column in {.arg data}.") } index_attrib <- index @@ -389,7 +389,7 @@ sliding_period <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - rlang::abort("`data` must be a data frame.") + cli_abort("{.arg data} must be a {.cls data frame}.") } lookback <- check_lookback(lookback) @@ -398,14 +398,14 @@ sliding_period <- function(data, step <- check_step(step) if (assess_start > assess_stop) { - rlang::abort("`assess_start` must be less than or equal to `assess_stop`.") + cli_abort("{.arg assess_start} must be less than or equal to {.arg assess_stop}.") } index <- rlang::enexpr(index) loc <- tidyselect::eval_select(index, data) if (length(loc) != 1L) { - rlang::abort("`index` must specify exactly one column in `data`.") + cli_abort("{.arg index} must specify exactly one column in {.arg data}.") } index_attrib <- index @@ -479,7 +479,7 @@ sliding_period <- function(data, check_lookback <- function(x) { if (vctrs::vec_size(x) != 1L) { - rlang::abort(paste0("`lookback` must have size 1.")) + cli_abort("{.arg lookback} must have size 1.") } if (identical(x, Inf)) { @@ -487,11 +487,11 @@ check_lookback <- function(x) { } if (!rlang::is_integerish(x, finite = TRUE)) { - rlang::abort(paste0("`lookback` must be an integer of size 1, or `Inf`.")) + cli_abort("{.arg lookback} must be an integer of size 1, or {.arg Inf}.") } if (x < 0L) { - rlang::abort(paste0("`lookback` must be positive, or zero.")) + cli_abort("{.arg lookback} must be positive, or zero.") } vctrs::vec_cast(x, integer(), x_arg = "lookback") @@ -499,7 +499,7 @@ check_lookback <- function(x) { check_assess <- function(x, arg) { if (vctrs::vec_size(x) != 1L) { - rlang::abort(paste0("`", arg, "` must have size 1.")) + cli_abort("{.arg arg} must have size 1.You have provided {.arg {arg}}") } if (identical(x, Inf)) { @@ -507,11 +507,11 @@ check_assess <- function(x, arg) { } if (!rlang::is_integerish(x, finite = TRUE)) { - rlang::abort(paste0("`", arg, "` must be an integer of size 1, or `Inf`.")) + cli_abort("{.arg arg} must be an integer of size 1, or `Inf`. You have provided {.arg {arg}}") } if (x <= 0L) { - rlang::abort(paste0("`", arg, "` must be positive.")) + cli_abort("{.arg arg} must be positive.") } vctrs::vec_cast(x, integer(), x_arg = arg) @@ -519,15 +519,15 @@ check_assess <- function(x, arg) { check_step <- function(x) { if (vctrs::vec_size(x) != 1L) { - rlang::abort(paste0("`step` must have size 1.")) + cli_abort("{.arg step} must have size 1.") } if (!rlang::is_integerish(x, finite = TRUE)) { - rlang::abort(paste0("`step` must be an integer of size 1.")) + cli_abort("{.arg step} must be an integer of size 1.") } if (x <= 0L) { - rlang::abort(paste0("`step` must be positive.")) + cli_abort("{.arg step} must be positive.") } vctrs::vec_cast(x, integer(), x_arg = "step") @@ -535,15 +535,15 @@ check_step <- function(x) { check_skip <- function(x) { if (vctrs::vec_size(x) != 1L) { - rlang::abort(paste0("`skip` must have size 1.")) + cli_abort("{.arg skip} must have size 1.") } if (!rlang::is_integerish(x, finite = TRUE)) { - rlang::abort(paste0("`skip` must be an integer of size 1.")) + cli_abort("{.arg skip}` must be an integer of size 1.") } if (x < 0L) { - rlang::abort(paste0("`skip` must be positive, or zero.")) + cli_abort("{.arg skip} must be positive, or zero.") } vctrs::vec_cast(x, integer(), x_arg = "skip") @@ -577,19 +577,19 @@ slice_step <- function(indices, step) { seq2_by <- function(from, to, by) { if (length(from) != 1) { - rlang::abort("`from` must be length one") + cli_abort("{.arg from} must be length one") } if (length(to) != 1) { - rlang::abort("`to` must be length one") + cli_abort("{.arg to} must be length one") } by <- as.integer(by) if (length(by) != 1) { - rlang::abort("`by` must be length one") + cli_abort("{.arg by} must be length one") } if (by <= 0L) { - rlang::abort("`by` must be positive") + cli_abort("{.arg by} must be positive") } if (from > to) { From 73036620755892c496e78fc6bbd4b4a7011ce2d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=BAria=20Mercad=C3=A9-Besora?= <61558739+nmercadeb@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:26:09 -0700 Subject: [PATCH 26/82] Update vignettes/Common_Patterns.Rmd Co-authored-by: Michael Mahoney --- vignettes/Common_Patterns.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index a69a0c7d..01516084 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -222,4 +222,13 @@ And if you want to set the size of windows based on units of time, for instance sliding_period(Chicago, date, "year") %>% head(2) ``` +All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `sliding_window()` function with `lookback = -Inf`: +```{r} +sliding_window(Chicago, lookback = Inf) %>% + head(2) +``` + +This is commonly referred to as "evaluation on a rolling forecasting origin", or more colloquially, "rolling origin cross-validation". + +Note that all of these time-based resampling functions are deterministic: unlike the rest of the package, running these functions repeatedly under different random seeds will always return the same results. From 494b52de6fe9183ef50d8994d9e42c914c01ed4b Mon Sep 17 00:00:00 2001 From: agmurray Date: Thu, 15 Aug 2024 14:27:22 -0700 Subject: [PATCH 27/82] Adjusting the added space found in edit --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9e96a39d..6b18a630 100644 --- a/NEWS.md +++ b/NEWS.md @@ -126,7 +126,7 @@ * Attempts to stratify on a `Surv` object now error more informatively (#230). -* Exposed `pool` argument from `make_strata()` in user-facing resampling functions (#229). +* Exposed `pool` argument from `make_strata()` in user-facing resampling functions (#229). * Deprecated the `gather()` method for `rset` objects in favor of `tidyr::pivot_longer()` (#233). From ccfba5e8fee59709aea54fd6d5194944c3858331 Mon Sep 17 00:00:00 2001 From: seb09 Date: Thu, 15 Aug 2024 14:55:24 -0700 Subject: [PATCH 28/82] Prevent LOO through vfold_cv() vfold_cv() throws an error, if used for leave-one-out cross-validation, refering to loo_cv() instead. Fixes #440 --- R/vfold.R | 17 +++++++++++------ man/vfold_cv.Rd | 4 +++- tests/testthat/_snaps/vfold.md | 4 +++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/R/vfold.R b/R/vfold.R index 06712410..94f12c0a 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -12,7 +12,9 @@ #' @template strata_details #' @inheritParams make_strata #' @param data A data frame. -#' @param v The number of partitions of the data set. +#' @param v The number of partitions of the data set. Should be an integer +#' smaller than `nrow(data)`. If you want to create a split for a leave-one-out +#' cross-validation (`v = nrow(data)`), please use [loo_cv()] instead. #' @param repeats The number of times to repeat the V-fold partitioning. #' @param strata A variable in `data` (single character or name) used to conduct #' stratified sampling. When not `NULL`, each resample is created within the @@ -74,17 +76,20 @@ vfold_cv <- function(data, v = 10, repeats = 1, strata_check(strata, data) check_repeats(repeats) + if (isTRUE(v == nrow(data))) { + rlang::abort(c( + "Leave-one-out cross-validation is not supported by `vfold_cv()`.", + x = "You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation.", + i = "Use `loo_cv()` in this case." + )) + } + if (repeats == 1) { split_objs <- vfold_splits( data = data, v = v, strata = strata, breaks = breaks, pool = pool ) } else { - if (v == nrow(data)) { - rlang::abort( - glue::glue("Repeated resampling when `v` is {v} would create identical resamples") - ) - } for (i in 1:repeats) { tmp <- vfold_splits(data = data, v = v, strata = strata, breaks = breaks ,pool = pool) tmp$id2 <- tmp$id diff --git a/man/vfold_cv.Rd b/man/vfold_cv.Rd index d605b747..6524efd9 100644 --- a/man/vfold_cv.Rd +++ b/man/vfold_cv.Rd @@ -9,7 +9,9 @@ vfold_cv(data, v = 10, repeats = 1, strata = NULL, breaks = 4, pool = 0.1, ...) \arguments{ \item{data}{A data frame.} -\item{v}{The number of partitions of the data set.} +\item{v}{The number of partitions of the data set. Should be an integer +smaller than \code{nrow(data)}. If you want to create a split for a leave-one-out +cross-validation (\code{v = nrow(data)}), please use \code{\link[=loo_cv]{loo_cv()}} instead.} \item{repeats}{The number of times to repeat the V-fold partitioning.} diff --git a/tests/testthat/_snaps/vfold.md b/tests/testthat/_snaps/vfold.md index 50788f81..b9542c7d 100644 --- a/tests/testthat/_snaps/vfold.md +++ b/tests/testthat/_snaps/vfold.md @@ -25,7 +25,9 @@ --- - Repeated resampling when `v` is 150 would create identical resamples + Leave-one-out cross-validation is not supported by `vfold_cv()`. + x You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation. + i Use `loo_cv()` in this case. --- From 79ac30b73b91184d2248ed004db4f4fea30a9d59 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 15 Aug 2024 23:59:11 +0200 Subject: [PATCH 29/82] update snapshottest and the review correctly. the snapshots are running with 1 fail. --- R/initial_validation_split.R | 12 ++--- R/make_groups.R | 6 +-- R/mc.R | 2 +- R/nest.R | 2 +- .../_snaps/initial_validation_split.md | 4 ++ tests/testthat/_snaps/nesting.new.md | 52 +++++++++++++++++++ 6 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 tests/testthat/_snaps/nesting.new.md diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index f95bf1b6..0de77be0 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -120,26 +120,26 @@ initial_validation_split <- function(data, check_prop_3 <- function(prop, call = rlang::caller_env()) { if (!is.numeric(prop)) { - cli_abort("`prop` needs to be numeric.", call = call) + cli_abort("{.arg prop} needs to be numeric.", call = call) } if (any(is.na(prop))) { - cli_abort("`prop` cannot contain `NA`.", call = call) + cli_abort("{.arg prop} cannot contain `NA`.", call = call) } if (any(is.null(prop))) { - cli_abort("`prop` cannot contain `NULL`.", call = call) + cli_abort("{.arg prop} cannot contain `NULL`.", call = call) } if (length(prop) != 2L) { cli_abort( - "`prop` needs to contain the proportions for training and validation.", + "{.arg prop} needs to contain the proportions for training and validation.", call = call ) } if (any(!(prop > 0)) | any(!(prop < 1))) { - cli_abort("Elements of `prop` need to be in (0, 1).", call = call) + cli_abort("Elements of {.arg prop} need to be in (0, 1).", call = call) } if (!(sum(prop) > 0 ) | !(sum(prop) < 1) ) { cli_abort( - "The sum of the proportions in `prop` needs to be in (0, 1).", + "The sum of the proportions in {.arg prop} needs to be in (0, 1).", call = call ) } diff --git a/R/make_groups.R b/R/make_groups.R index 5b5ef4a2..a4378c0b 100644 --- a/R/make_groups.R +++ b/R/make_groups.R @@ -297,7 +297,7 @@ check_prop <- function(prop, replace) { acceptable_prop <- acceptable_prop && prop > 0 if (!acceptable_prop) { cli_abort( - "`prop` must be a number between 0 and 1.", + "{.arg prop} must be a number between 0 and 1.", call = rlang::caller_env() ) } @@ -346,12 +346,12 @@ validate_group <- function(group, data, call = rlang::caller_env()) { if (is.null(group) || !is.character(group) || length(group) != 1) { cli_abort( - "{.arg {group}} should be a single character value for the column that will be used for splitting.", + "{.arg group} should be a single character value for the column that will be used for splitting.", call = call ) } if (!any(names(data) == group)) { - cli_abort("{.arg {group}} should be a column in {.arg {data}}.", call = call) + cli_abort("{.arg group} should be a column in {.arg data}.", call = call) } group diff --git a/R/mc.R b/R/mc.R index 1eed1822..12d80677 100644 --- a/R/mc.R +++ b/R/mc.R @@ -104,7 +104,7 @@ mc_complement <- function(ind, n) { mc_splits <- function(data, prop = 3 / 4, times = 25, strata = NULL, breaks = 4, pool = 0.1) { if (!is.numeric(prop) | prop >= 1 | prop <= 0) { - cli_abort("`prop` must be a number on (0, 1).") + cli_abort("{.arg prop} must be a number on (0, 1).") } n <- nrow(data) diff --git a/R/nest.R b/R/nest.R index da6ac0b1..45072c6f 100644 --- a/R/nest.R +++ b/R/nest.R @@ -81,7 +81,7 @@ nested_cv <- function(data, outside, inside) { inner_cl <- cl[["inside"]] if (!is_call(inner_cl)) { cli_abort( - "{.arg {inside}} should be a expression such as {.fun vfold} or // + "{.arg inside} should be a expression such as {.code vfold()} or {.code bootstraps(times = 10)} instead of an existing object." ) } diff --git a/tests/testthat/_snaps/initial_validation_split.md b/tests/testthat/_snaps/initial_validation_split.md index 05265a11..e9098b59 100644 --- a/tests/testthat/_snaps/initial_validation_split.md +++ b/tests/testthat/_snaps/initial_validation_split.md @@ -5,6 +5,7 @@ Condition Error in `analysis()`: ! The initial validation split does not contain an analysis set. + i You can access the training data with `training()`. --- @@ -13,6 +14,7 @@ Condition Error in `assessment()`: ! The initial validation split does not contain an assessment set. + i You can access the testing data with `testing()`. # basic split stratified @@ -49,6 +51,7 @@ Condition Error in `analysis()`: ! The initial validation split does not contain an analysis set. + i You can access the training data with `training()`. --- @@ -57,6 +60,7 @@ Condition Error in `assessment()`: ! The initial validation split does not contain an assessment set. + i You can access the testing data with `testing()`. # check_prop_3() works diff --git a/tests/testthat/_snaps/nesting.new.md b/tests/testthat/_snaps/nesting.new.md new file mode 100644 index 00000000..349a32cc --- /dev/null +++ b/tests/testthat/_snaps/nesting.new.md @@ -0,0 +1,52 @@ +# bad args + + Code + skip_if(new_rng_snapshots) + set.seed(123) + nested_cv(mtcars, outside = bootstraps(times = 5), inside = vfold_cv(v = 3)) + Condition + Warning: + Using bootstrapping as the outer resample is dangerous since the inner resample might have the same data point in both the analysis and assessment set. + Output + # Nested resampling: + # outer: Bootstrap sampling + # inner: 3-fold cross-validation + # A tibble: 5 x 3 + splits id inner_resamples + + 1 Bootstrap1 + 2 Bootstrap2 + 3 Bootstrap3 + 4 Bootstrap4 + 5 Bootstrap5 + +--- + + Code + nested_cv(mtcars, outside = vfold_cv(), inside = folds) + Condition + Error in `nested_cv()`: + ! `inside` should be a expression such as `vfold()` or `bootstraps(times = 10)` instead of an existing object. + +# printing + + Code + rs1 + Output + # Nested resampling: + # outer: 10-fold cross-validation + # inner: 3-fold cross-validation + # A tibble: 10 x 3 + splits id inner_resamples + + 1 Fold01 + 2 Fold02 + 3 Fold03 + 4 Fold04 + 5 Fold05 + 6 Fold06 + 7 Fold07 + 8 Fold08 + 9 Fold09 + 10 Fold10 + From 3ea126634b993e174abd2dc4c52c70e941c5da50 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 15 Aug 2024 15:12:44 -0700 Subject: [PATCH 30/82] add news bullet about #464 --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a6f6374c..de58f72d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Fixed example for `nested_cv()` (@seb09, #520). +* Removed trailing space in printing of `mc_cv()` objects (@ccani007, #464). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From a6c2b726b7c3d8b4322e7dc1831c6a1427b332c6 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Fri, 16 Aug 2024 00:13:52 +0200 Subject: [PATCH 31/82] staged only labels, when I run the test I get 0 Fails. --- R/labels.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/labels.R b/R/labels.R index 53e7bfe6..2aff05ba 100644 --- a/R/labels.R +++ b/R/labels.R @@ -14,7 +14,7 @@ #' labels(vfold_cv(mtcars)) labels.rset <- function(object, make_factor = FALSE, ...) { if (inherits(object, "nested_cv")) { - rlang::abort("`labels` not implemented for nested resampling") + cli_abort("{.arg labels} not implemented for nested resampling") } if (make_factor) { as.factor(object$id) @@ -27,7 +27,7 @@ labels.rset <- function(object, make_factor = FALSE, ...) { #' @export labels.vfold_cv <- function(object, make_factor = FALSE, ...) { if (inherits(object, "nested_cv")) { - rlang::abort("`labels` not implemented for nested resampling") + cli_abort("{.arg labels} not implemented for nested resampling") } is_repeated <- attr(object, "repeats") > 1 if (is_repeated) { @@ -92,18 +92,18 @@ labels.rsplit <- function(object, ...) { #' @export add_resample_id <- function(.data, split, dots = FALSE) { if (!inherits(dots, "logical") || length(dots) > 1) { - rlang::abort("`dots` should be a single logical.") + cli_abort("{.arg dots} should be a single logical.") } if (!inherits(.data, "data.frame")) { - rlang::abort("`.data` should be a data frame.") + cli_abort("{.arg .data} should be a {.cls data frame}.") } if (!inherits(split, "rsplit")) { - rlang::abort("`split` should be a single 'rset' object.") + cli_abort("{.arg split} should be a single 'rset' object.") } labs <- labels(split) if (!tibble::is_tibble(labs) && nrow(labs) == 1) { - rlang::abort("`split` should be a single 'rset' object.") + cli_abort("{.arg split} should be a single 'rset' object.") } if (dots) { From 3a99d3a0bc2a0de9595b39375915aae240d1606a Mon Sep 17 00:00:00 2001 From: agmurray Date: Thu, 15 Aug 2024 15:15:42 -0700 Subject: [PATCH 32/82] addressing comments from Hannah on make_strata.R and vignettes.Rmd --- R/make_strata.R | 1 - vignettes/Working_with_rsets.Rmd | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/make_strata.R b/R/make_strata.R index cef69225..af23003e 100644 --- a/R/make_strata.R +++ b/R/make_strata.R @@ -51,7 +51,6 @@ #' table(x3) #' table(make_strata(x3)) #' -#' # `oilType` data from #' x4 <- rep(LETTERS[1:7], c(37, 26, 3, 7, 11, 10, 2)) #' table(x4) #' table(make_strata(x4)) diff --git a/vignettes/Working_with_rsets.Rmd b/vignettes/Working_with_rsets.Rmd index 4d6b4a2b..4d97c71b 100644 --- a/vignettes/Working_with_rsets.Rmd +++ b/vignettes/Working_with_rsets.Rmd @@ -109,7 +109,7 @@ example[1:10, setdiff(names(example), names(attrition))] For this model, the `.fitted` value is the linear predictor in log-odds units. -To compute this data set for each of the 100 resamples, we'll use the `map` function from the package: +To compute this data set for each of the 100 resamples, we'll use the `map` function from the purrr package: ```{r model_purrr, warning=FALSE} library(purrr) From d45a72ad28f4b3dbdf7fb408a32d6b8f005482c4 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Fri, 16 Aug 2024 05:40:29 -0400 Subject: [PATCH 33/82] Edits so checks can pass --- R/bootci.R | 3 +- tests/testthat/_snaps/bootci.md | 8 +++--- tests/testthat/_snaps/bootci.new.md | 44 +++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/_snaps/bootci.new.md diff --git a/R/bootci.R b/R/bootci.R index 510ff44f..e47d3497 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -128,7 +128,8 @@ check_num_resamples <- function(x, B = 1000) { dplyr::filter(n < B) if (nrow(x) > 0) { - cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {x$terms} term{?s}.") + terms <- paste0("`", x$term, "`") + cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {cli::qty(terms)} term{?s} {terms}.") } invisible(NULL) } diff --git a/tests/testthat/_snaps/bootci.md b/tests/testthat/_snaps/bootci.md index d3106318..77e607e1 100644 --- a/tests/testthat/_snaps/bootci.md +++ b/tests/testthat/_snaps/bootci.md @@ -4,9 +4,9 @@ int_pctl(bt_resamples, res) Condition Warning: - Recommend at least 1000 non-missing bootstrap resamples for `mean` term. + Recommend at least 1000 non-missing bootstrap resamples for term `mean`. Error in `pctl_single()`: - ! All statistics have missing values. + ! All statistics have missing values.. --- @@ -14,7 +14,7 @@ int_t(bt_resamples, res) Condition Warning: - Recommend at least 500 non-missing bootstrap resamples for `mean` term. + Recommend at least 500 non-missing bootstrap resamples for term `mean`. Error in `t_single()`: ! All statistics have missing values. @@ -24,7 +24,7 @@ int_bca(bt_resamples, res, .fn = bad_stats) Condition Warning: - Recommend at least 1000 non-missing bootstrap resamples for `mean` term. + Recommend at least 1000 non-missing bootstrap resamples for term `mean`. Error in `bca_calc()`: ! All statistics have missing values. diff --git a/tests/testthat/_snaps/bootci.new.md b/tests/testthat/_snaps/bootci.new.md new file mode 100644 index 00000000..8632d77e --- /dev/null +++ b/tests/testthat/_snaps/bootci.new.md @@ -0,0 +1,44 @@ +# Upper & lower confidence interval does not contain NA + + Code + int_pctl(bt_resamples, res) + Condition + Warning: + Recommend at least 1000 non-missing bootstrap resamples for term `mean`. + Error in `pctl_single()`: + ! All statistics have missing values. + +--- + + Code + int_t(bt_resamples, res) + Condition + Warning: + Recommend at least 500 non-missing bootstrap resamples for term `mean`. + Error in `t_single()`: + ! All statistics have missing values. + +--- + + Code + int_bca(bt_resamples, res, .fn = bad_stats) + Condition + Warning: + Recommend at least 1000 non-missing bootstrap resamples for term `mean`. + Error in `bca_calc()`: + ! All statistics have missing values. + +# regression intervals + + Code + skip_if(new_rng_snapshots) + set.seed(123) + int_2 <- reg_intervals(mpg ~ disp + wt, data = mtcars, filter = term == "wt", + model_fn = "glm", keep_reps = TRUE) + int_2 + Output + # A tibble: 1 x 7 + term .lower .estimate .upper .alpha .method .replicates + > + 1 wt -5.62 -3.46 -0.955 0.05 student-t [1,001 x 2] + From 11db0678c5de2fccb642df4f116ae690fe4c240f Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Fri, 16 Aug 2024 10:41:21 -0400 Subject: [PATCH 34/82] Edits check_num_resamples warning --- R/bootci.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index e47d3497..504f8aa7 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -128,8 +128,8 @@ check_num_resamples <- function(x, B = 1000) { dplyr::filter(n < B) if (nrow(x) > 0) { - terms <- paste0("`", x$term, "`") - cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {cli::qty(terms)} term{?s} {terms}.") + terms <- x$term + cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {terms} term{?s}.") } invisible(NULL) } From bbb42f4ef9494f84b7666feda3a59df7911762da Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Fri, 16 Aug 2024 08:34:43 -0700 Subject: [PATCH 35/82] superseded --- R/rolling_origin.R | 2 ++ vignettes/Common_Patterns.Rmd | 1 + 2 files changed, 3 insertions(+) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 96352293..3325961a 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -1,5 +1,7 @@ #' Rolling Origin Forecast Resampling #' +#' `r lifecycle::badge("superseded")` +#' #' This resampling method is useful when the data set has a strong time #' component. The resamples are not random and contain data points that are #' consecutive values. The function assumes that the original data set are diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index 01516084..a7c51356 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -222,6 +222,7 @@ And if you want to set the size of windows based on units of time, for instance sliding_period(Chicago, date, "year") %>% head(2) ``` + All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `sliding_window()` function with `lookback = -Inf`: ```{r} From cba51be1ebf08e1a4042ebbba1696bcbfb58311e Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Mon, 19 Aug 2024 15:43:24 +0200 Subject: [PATCH 36/82] Added cli_abort and changed some formating. Fixes 511 --- R/rset.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/rset.R b/R/rset.R index ee8e9d09..751cb4fe 100644 --- a/R/rset.R +++ b/R/rset.R @@ -18,7 +18,7 @@ new_rset <- function(splits, ids, attrib = NULL, ids <- tibble(id = ids) } else { if (!all(grepl("^id", names(ids)))) { - rlang::abort("The `ids` tibble column names should start with 'id'.") + cli_abort("The {.code id} tibble column names should start with 'id'.") } } either_type <- function(x) { @@ -26,15 +26,15 @@ new_rset <- function(splits, ids, attrib = NULL, } ch_check <- vapply(ids, either_type, c(logical = TRUE)) if (!all(ch_check)) { - rlang::abort("All ID columns should be character or factor vectors.") + cli_abort("{.strong All} ID columns should be character or factor {.field vectors}.") } if (!is_tibble(splits)) { splits <- tibble(splits = splits) } else { if (ncol(splits) > 1 | names(splits)[1] != "splits") { - rlang::abort( - "The `splits` tibble should have a single column named `splits`." + cli_abort( + "The {.var splits} tibble should have a single column named {.code splits}." ) } } @@ -42,11 +42,11 @@ new_rset <- function(splits, ids, attrib = NULL, where_rsplits <- vapply(splits[["splits"]], is_rsplit, logical(1)) if (!all(where_rsplits)) { - rlang::abort("Each element of `splits` must be an `rsplit` object.") + cli_abort("Each element of {.var splits} must be an {.var rsplit} object.") } if (nrow(ids) != nrow(splits)) { - rlang::abort("Split and ID vectors have different lengths.") + cli_abort("Split and ID vectors have different lengths.") } # Create another element to the splits that is a tibble containing @@ -64,7 +64,7 @@ new_rset <- function(splits, ids, attrib = NULL, if (!is.null(attrib)) { if (any(names(attrib) == "")) { - rlang::abort("`attrib` should be a fully named list.") + cli_abort("{.var attrib} should be a fully named {.field list}.") } for (i in names(attrib)) { attr(res, i) <- attrib[[i]] From 74e61dd5cfef0f50e9651fb0491ba53e4533c1cc Mon Sep 17 00:00:00 2001 From: Priyata Date: Thu, 29 Aug 2024 12:19:17 +0200 Subject: [PATCH 37/82] Update R/rset.R Co-authored-by: Simon P. Couch --- R/rset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rset.R b/R/rset.R index 751cb4fe..281505d5 100644 --- a/R/rset.R +++ b/R/rset.R @@ -64,7 +64,7 @@ new_rset <- function(splits, ids, attrib = NULL, if (!is.null(attrib)) { if (any(names(attrib) == "")) { - cli_abort("{.var attrib} should be a fully named {.field list}.") + cli_abort("{.arg attrib} should be a fully named list.") } for (i in names(attrib)) { attr(res, i) <- attrib[[i]] From 70b6b9d741074b823a66111a75aa522375349c69 Mon Sep 17 00:00:00 2001 From: Priyata Date: Thu, 29 Aug 2024 12:19:24 +0200 Subject: [PATCH 38/82] Update R/rset.R Co-authored-by: Simon P. Couch --- R/rset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rset.R b/R/rset.R index 281505d5..0defe9f6 100644 --- a/R/rset.R +++ b/R/rset.R @@ -42,7 +42,7 @@ new_rset <- function(splits, ids, attrib = NULL, where_rsplits <- vapply(splits[["splits"]], is_rsplit, logical(1)) if (!all(where_rsplits)) { - cli_abort("Each element of {.var splits} must be an {.var rsplit} object.") + cli_abort("Each element of {.arg splits} must be an {.cls rsplit} object.") } if (nrow(ids) != nrow(splits)) { From e1dc2f2ce176b6024d46f274cd2b9765205cd9d0 Mon Sep 17 00:00:00 2001 From: Priyata Date: Thu, 29 Aug 2024 12:19:31 +0200 Subject: [PATCH 39/82] Update R/rset.R Co-authored-by: Simon P. Couch --- R/rset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rset.R b/R/rset.R index 0defe9f6..061b57c2 100644 --- a/R/rset.R +++ b/R/rset.R @@ -26,7 +26,7 @@ new_rset <- function(splits, ids, attrib = NULL, } ch_check <- vapply(ids, either_type, c(logical = TRUE)) if (!all(ch_check)) { - cli_abort("{.strong All} ID columns should be character or factor {.field vectors}.") + cli_abort("{.strong All} ID columns should be character or factor vectors.") } if (!is_tibble(splits)) { From fbc2e2e9debdacd99177e905295ae32a5b0c63cd Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 29 Aug 2024 13:02:07 +0200 Subject: [PATCH 40/82] Rsample cli errors update for 510 --- R/permutations.R | 11 ++++++++--- R/reg_intervals.R | 10 ++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/R/permutations.R b/R/permutations.R index 8e2df6b0..64e17b0b 100644 --- a/R/permutations.R +++ b/R/permutations.R @@ -58,10 +58,15 @@ permutations <- function(data, col_id <- tidyselect::eval_select(permute, data) if (identical(length(col_id), 0L)) { - rlang::abort("You must specify at least one column to permute!") + cli_abort("{.strong You must specify at least one column to permute!}") } else if (identical(length(col_id), ncol(data))) { - rlang::abort("You have selected all columns to permute. This effectively reorders the rows in the original data without changing the data structure. Please select fewer columns to permute.") - } + cli_abort(c( + "!" = "{.emph {.strong You have selected all columns to permute.}}", + "x" = "This effectively reorders the rows in the original data without changing the data structure.", + "i" = "To achieve meaningful permutation:", + "*" = "{.field Select fewer columns} to permute.", + ">" = "Ideal: Choose specific columns that are relevant to your analysis.")) + } split_objs <- perm_splits(data, times) diff --git a/R/reg_intervals.R b/R/reg_intervals.R index 50a0cf44..ef2faed8 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -54,12 +54,18 @@ reg_intervals <- } else { times <- times[1] if (!is.numeric(times)) { - rlang::abort("'times' should be a single integer.") + cli_abort(c( + "x" = "{.arg times} should be a single integer.", + "i" = "You provided {.val {times}}." + )) } } if (length(alpha) != 1 || !is.numeric(alpha)) { - abort("`alpha` must be a single numeric value.") + cli_abort(c( + "x" = "{.arg alpha} must be a single numeric value.", + "i" = "Please ensure that {.arg alpha} is a numeric value and not a vector or other type." + )) } if (model_fn %in% c("survreg", "coxph")) { From 3cb2ebb61ba99e1f0744c80d27dde56c67bcdad7 Mon Sep 17 00:00:00 2001 From: Priyata Kalra Date: Thu, 29 Aug 2024 13:53:47 +0200 Subject: [PATCH 41/82] There are snapshot test errors for validation_set and vfold. --- R/tidy.R | 2 +- R/validation_set.R | 8 ++++---- R/vfold.R | 38 +++++++++++++++++++------------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index acba8b33..48839b5f 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -119,7 +119,7 @@ tidy.nested_cv <- function(x, unique_ind = TRUE, ...) { inner_id <- grep("^id", names(inner_tidy)) if (length(inner_id) != length(id_cols)) { - rlang::abort("Cannot merge tidy data sets") + cli_abort("{.strong {.red Cannot} merge tidy data sets}") } names(inner_tidy)[inner_id] <- id_cols full_join(outer_tidy, inner_tidy, by = id_cols) diff --git a/R/validation_set.R b/R/validation_set.R index 14d48eb3..23683488 100644 --- a/R/validation_set.R +++ b/R/validation_set.R @@ -89,8 +89,8 @@ validation.val_split <- function(x, ...) { #' @rdname validation_set #' @export testing.val_split <- function(x, ...) { - rlang::abort( - "The testing data is not part of the validation set object.", - i = "It is part of the result of the initial 3-way split, e.g., with `initial_validation_split()`." - ) + cli_abort(c( + "x" = "{.strong The testing data is not part of the validation set object.}", + "i" = "It is part of the result of the initial 3-way split, e.g., with {.code initial_validation_split()}." +)) } diff --git a/R/vfold.R b/R/vfold.R index 06712410..efa8d6f6 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -81,9 +81,10 @@ vfold_cv <- function(data, v = 10, repeats = 1, ) } else { if (v == nrow(data)) { - rlang::abort( - glue::glue("Repeated resampling when `v` is {v} would create identical resamples") - ) + cli_abort(c( + "x" = sprintf("Repeated resampling when {.arg v} is %s would create identical resamples", v), + "i" = "Consider adjusting the value of {.arg v} to avoid identical resamples." + )) } for (i in 1:repeats) { tmp <- vfold_splits(data = data, v = v, strata = strata, breaks = breaks ,pool = pool) @@ -225,14 +226,13 @@ group_vfold_cv <- function(data, group = NULL, v = NULL, repeats = 1, balance = split_objs <- group_vfold_splits(data = data, group = group, v = v, balance = balance, strata = strata, pool = pool) } else { if (is.null(v)) { - rlang::abort( - "Repeated resampling when `v` is `NULL` would create identical resamples" + cli_abort( + "Repeated resampling when {.arg v} is {.val NULL} would create identical resamples" ) } if (v == length(unique(getElement(data, group)))) { - rlang::abort( - glue::glue("Repeated resampling when `v` is {v} would create identical resamples") - ) + cli_abort("Repeated resampling when {.arg v} is {.val {v}} would create identical resamples") + } for (i in 1:repeats) { tmp <- group_vfold_splits(data = data, group = group, v = v, balance = balance, strata = strata, pool = pool) @@ -292,12 +292,11 @@ group_vfold_splits <- function(data, group, v = NULL, balance, strata = NULL, po ) if (max_v < 5) { - rlang::abort(c( - message, - x = glue::glue("The least common stratum only had {max_v} groups, which may not be enough for cross-validation."), - i = "Set `v` explicitly to override this error." - ), - call = rlang::caller_env()) + cli_abort(c( + if (is.function(message)) message() else message, + "x" = "The least common stratum only had {.val {max_v}} groups, which may not be enough for cross-validation.", + "i" = "Set {.arg v} explicitly to override this error." + ), call = rlang::caller_env()) } rlang::warn(c( @@ -334,10 +333,11 @@ add_vfolds <- function(x, v) { check_v <- function(v, max_v, rows = "rows", call = rlang::caller_env()) { if (!is.numeric(v) || length(v) != 1 || v < 2) { - rlang::abort("`v` must be a single positive integer greater than 1", call = call) + cli_abort("{.var v} must be a single positive integer greater than 1", call = call) } else if (v > max_v) { - rlang::abort( - glue::glue("The number of {rows} is less than `v = {v}`"), call = call + cli_abort( + "The number of {.field {rows}} is less than {.arg v} = {.val {v}}", + call = call ) } } @@ -358,7 +358,7 @@ check_grouped_strata <- function(group, strata, pool, data) { if (nrow(vctrs::vec_unique(grouped_table)) != nrow(vctrs::vec_unique(grouped_table["group"]))) { - rlang::abort("`strata` must be constant across all members of each `group`.") + cli_abort("{.var strata} must be constant across all members of each {.var group}.") } strata @@ -366,6 +366,6 @@ check_grouped_strata <- function(group, strata, pool, data) { check_repeats <- function(repeats, call = rlang::caller_env()) { if (!is.numeric(repeats) || length(repeats) != 1 || repeats < 1) { - rlang::abort("`repeats` must be a single positive integer", call = call) + cli_abort("{.var repeats} must be a single positive integer", call = call) } } From bb62a1046e10cf7bd48e6e4df618556c227404ad Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 13:39:25 +0100 Subject: [PATCH 42/82] increase linking and update formatting --- R/initial_split.R | 14 +++++++------- R/initial_validation_split.R | 4 ++-- R/validation_set.R | 2 +- man/initial_split.Rd | 10 +++++----- man/initial_validation_split.Rd | 2 +- man/validation_set.Rd | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/initial_split.R b/R/initial_split.R index 51d2c5cd..c754e424 100644 --- a/R/initial_split.R +++ b/R/initial_split.R @@ -1,20 +1,20 @@ #' Simple Training/Test Set Splitting #' -#' `initial_split` creates a single binary split of the data into a training -#' set and testing set. `initial_time_split` does the same, but takes the +#' `initial_split()` creates a single binary split of the data into a training +#' set and testing set. `initial_time_split()` does the same, but takes the #' _first_ `prop` samples for training, instead of a random selection. -#' `group_initial_split` creates splits of the data based +#' `group_initial_split()` creates splits of the data based #' on some grouping variable, so that all data in a "group" is assigned to -#' the same split. -#' -#' @details `training` and `testing` are used to extract the resulting data. +#' the same split. +#' +#' @details `training()` and `testing()` are used to extract the resulting data. #' #' @template strata_details #' @inheritParams vfold_cv #' @inheritParams make_strata #' @param prop The proportion of data to be retained for modeling/analysis. #' @export -#' @return An `rsplit` object that can be used with the `training` and `testing` +#' @return An `rsplit` object that can be used with the `training()` and `testing()` #' functions to extract the data in each split. #' @examplesIf rlang::is_installed("modeldata") #' set.seed(1353) diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index 70d40ec8..0aec03fd 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -9,9 +9,9 @@ #' based on some grouping variable, so that all data in a "group" are assigned #' to the same partition. #' -#' @details `training()`, `validation()`, and `testing()` can be used to extract the +#' @details [training()], [validation()], and [testing()] can be used to extract the #' resulting data sets. -#' Use [`validation_set()`] to create an `rset` object for use with functions from +#' Use [validation_set()] to create an `rset` object for use with functions from #' the tune package such as `tune::tune_grid()`. #' #' @template strata_details diff --git a/R/validation_set.R b/R/validation_set.R index 2ec79da7..db775e02 100644 --- a/R/validation_set.R +++ b/R/validation_set.R @@ -1,6 +1,6 @@ #' Create a Validation Split for Tuning #' -#' `validation_set` creates a the validation split for model tuning. +#' `validation_set()` creates a the validation split for model tuning. #' #' @param split An object of class `initial_validation_split`, such as resulting #' from [initial_validation_split()] or [group_initial_validation_split()]. diff --git a/man/initial_split.Rd b/man/initial_split.Rd index 1e8946f2..07fc96b6 100644 --- a/man/initial_split.Rd +++ b/man/initial_split.Rd @@ -61,19 +61,19 @@ grouping observations with the same value to either the analysis or assessment set within a fold.} } \value{ -An \code{rsplit} object that can be used with the \code{training} and \code{testing} +An \code{rsplit} object that can be used with the \code{training()} and \code{testing()} functions to extract the data in each split. } \description{ -\code{initial_split} creates a single binary split of the data into a training -set and testing set. \code{initial_time_split} does the same, but takes the +\code{initial_split()} creates a single binary split of the data into a training +set and testing set. \code{initial_time_split()} does the same, but takes the \emph{first} \code{prop} samples for training, instead of a random selection. -\code{group_initial_split} creates splits of the data based +\code{group_initial_split()} creates splits of the data based on some grouping variable, so that all data in a "group" is assigned to the same split. } \details{ -\code{training} and \code{testing} are used to extract the resulting data. +\code{training()} and \code{testing()} are used to extract the resulting data. With a \code{strata} argument, the random sampling is conducted \emph{within the stratification variable}. This can help ensure that the diff --git a/man/initial_validation_split.Rd b/man/initial_validation_split.Rd index 0b9d5be8..9017e36e 100644 --- a/man/initial_validation_split.Rd +++ b/man/initial_validation_split.Rd @@ -83,7 +83,7 @@ based on some grouping variable, so that all data in a "group" are assigned to the same partition. } \details{ -\code{training()}, \code{validation()}, and \code{testing()} can be used to extract the +\code{\link[=training]{training()}}, \code{\link[=validation]{validation()}}, and \code{\link[=testing]{testing()}} can be used to extract the resulting data sets. Use \code{\link[=validation_set]{validation_set()}} to create an \code{rset} object for use with functions from the tune package such as \code{tune::tune_grid()}. diff --git a/man/validation_set.Rd b/man/validation_set.Rd index b5e3fc2a..98de1f31 100644 --- a/man/validation_set.Rd +++ b/man/validation_set.Rd @@ -35,7 +35,7 @@ An tibble with classes \code{validation_set}, \code{rset}, \code{tbl_df}, \code{ column called \code{id} that has a character string with the resample identifier. } \description{ -\code{validation_set} creates a the validation split for model tuning. +\code{validation_set()} creates a the validation split for model tuning. } \examples{ set.seed(1353) From 5b4733dbe636cfc109eccd9d1b8ee5e22de9724a Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 13:42:04 +0100 Subject: [PATCH 43/82] Add acknowledgement --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2e678b33..e088e668 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Started moving error messages to cli (#499, #502). +* Improved documentation for `initial_split()` and friends (@laurabrianna, #519). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 3e8eb73bcf769149db91f304b2d9d509fae94bc1 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 13:56:09 +0100 Subject: [PATCH 44/82] refers to the package, not a class --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6b18a630..6e4d9582 100644 --- a/NEWS.md +++ b/NEWS.md @@ -226,7 +226,7 @@ Small maintenance release. * A vignette and a function (`prepper`) were included to facilitate using the recipes with rsample. * A `gather` method was added for `rset` objects. * A `labels` method was added for `rsplit` objects. This can help identify which resample is being used even when the whole `rset` object is not available. -* A variety of dplyr methods were added (e.g. `filter()`, `mutate()`, etc) that work without dropping classes or attributes of the `rsample` objects. +* A variety of dplyr methods were added (e.g. `filter()`, `mutate()`, etc) that work without dropping classes or attributes of the rsample objects. # rsample 0.0.1 (2017-07-08) From 0806b5a7ee2f04ff7ed453c01eb268799fc388ea Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 13:58:55 +0100 Subject: [PATCH 45/82] Add acknowledgment --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 6e4d9582..7e4e4b15 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Started moving error messages to cli (#499, #502). +* Formatting improvement: package names are now not in backticks anymore (@agmurray, #525). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 3ca624dd3f196d37fc79ac061f6a473d29d0cc8e Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 14:26:26 +0100 Subject: [PATCH 46/82] `document()` found change from previous PR --- man/make_strata.Rd | 1 - 1 file changed, 1 deletion(-) diff --git a/man/make_strata.Rd b/man/make_strata.Rd index c2b7b434..9d9c50bc 100644 --- a/man/make_strata.Rd +++ b/man/make_strata.Rd @@ -64,7 +64,6 @@ x3 <- factor(x2) table(x3) table(make_strata(x3)) -# `oilType` data from x4 <- rep(LETTERS[1:7], c(37, 26, 3, 7, 11, 10, 2)) table(x4) table(make_strata(x4)) From bf9b80b0fdeb7cfbd7dccdd83eeab31c09037675 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:49:03 +0100 Subject: [PATCH 47/82] Link to function help --- R/boot.R | 2 +- R/caret.R | 2 +- R/form_pred.R | 2 +- R/labels.R | 3 +-- R/permutations.R | 2 +- R/reg_intervals.R | 2 +- R/tidy.R | 4 ++-- man/bootstraps.Rd | 2 +- man/form_pred.Rd | 2 +- man/labels.rset.Rd | 3 +-- man/permutations.Rd | 2 +- man/reg_intervals.Rd | 2 +- man/rsample2caret.Rd | 2 +- man/tidy.rsplit.Rd | 4 ++-- 14 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/boot.R b/R/boot.R index fa8fb651..edb40593 100644 --- a/R/boot.R +++ b/R/boot.R @@ -17,7 +17,7 @@ #' @param times The number of bootstrap samples. #' @param apparent A logical. Should an extra resample be added where the #' analysis and holdout subset are the entire data set. This is required for -#' some estimators used by the `summary()` function that require the apparent +#' some estimators used by the [summary()] function that require the apparent #' error rate. #' @export #' @return A tibble with classes `bootstraps`, `rset`, `tbl_df`, `tbl`, and diff --git a/R/caret.R b/R/caret.R index 609fbc19..26f66fcc 100644 --- a/R/caret.R +++ b/R/caret.R @@ -4,7 +4,7 @@ #' \pkg{rsample} and \pkg{caret}. #' #' @param object An `rset` object. Currently, -#' `nested_cv()` is not supported. +#' [nested_cv()] is not supported. #' @return `rsample2caret()` returns a list that mimics the #' `index` and `indexOut` elements of a #' `trainControl` object. `caret2rsample()` returns an diff --git a/R/form_pred.R b/R/form_pred.R index 3d166c37..77348511 100644 --- a/R/form_pred.R +++ b/R/form_pred.R @@ -1,6 +1,6 @@ #' Extract Predictor Names from Formula or Terms #' -#' `all.vars` returns all variables used in a formula. This +#' While [all.vars()] returns all variables used in a formula, this #' function only returns the variables explicitly used on the #' right-hand side (i.e., it will not resolve dots unless the #' object is terms with a data set specified). diff --git a/R/labels.R b/R/labels.R index 9cfe767e..131bc635 100644 --- a/R/labels.R +++ b/R/labels.R @@ -1,8 +1,7 @@ #' Find Labels from rset Object #' #' Produce a vector of resampling labels (e.g. "Fold1") from -#' an `rset` object. Currently, `nested_cv()` -#' is not supported. +#' an `rset` object. Currently, [nested_cv()] is not supported. #' #' @param object An `rset` object #' @param make_factor A logical for whether the results should be diff --git a/R/permutations.R b/R/permutations.R index e5a42a13..c9702e48 100644 --- a/R/permutations.R +++ b/R/permutations.R @@ -5,7 +5,7 @@ #' by permuting/shuffling one or more columns. This results in analysis #' samples where some columns are in their original order and some columns #' are permuted to a random order. Unlike other sampling functions in -#' rsample, there is no assessment set and calling `assessment()` on a +#' rsample, there is no assessment set and calling [assessment()] on a #' permutation split will throw an error. #' #' @param data A data frame. diff --git a/R/reg_intervals.R b/R/reg_intervals.R index 50a0cf44..ee8bb3b4 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -13,7 +13,7 @@ #' @param filter A logical expression used to remove rows from the final result, or `NULL` to keep all rows. #' @param keep_reps Should the individual parameter estimates for each bootstrap #' sample be retained? -#' @param ... Options to pass to the model function (such as `family` for `glm()`). +#' @param ... Options to pass to the model function (such as `family` for [stats::glm()]). #' @return A tibble with columns "term", ".lower", ".estimate", ".upper", #' ".alpha", and ".method". If `keep_reps = TRUE`, an additional list column #' called ".replicates" is also returned. diff --git a/R/tidy.R b/R/tidy.R index 4d268c2a..60a5c6a2 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -11,8 +11,8 @@ #' @return A tibble with columns `Row` and `Data`. The latter has possible #' values "Analysis" or "Assessment". For `rset` inputs, identification #' columns are also returned but their names and values depend on the type of -#' resampling. For `vfold_cv()`, contains a column "Fold" and, if repeats are -#' used, another called "Repeats". `bootstraps()` and `mc_cv()` use the column +#' resampling. For [vfold_cv()], contains a column "Fold" and, if repeats are +#' used, another called "Repeats". [bootstraps()] and [mc_cv()] use the column #' "Resample". #' @details Note that for nested resampling, the rows of the inner resample, #' named `inner_Row`, are *relative* row indices and do not correspond to the diff --git a/man/bootstraps.Rd b/man/bootstraps.Rd index 93854d30..f56a4d8a 100644 --- a/man/bootstraps.Rd +++ b/man/bootstraps.Rd @@ -33,7 +33,7 @@ of stratifying groups that are too small.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary()} function that require the apparent +some estimators used by the \code{\link[=summary]{summary()}} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/form_pred.Rd b/man/form_pred.Rd index e80e43e9..189f50c0 100644 --- a/man/form_pred.Rd +++ b/man/form_pred.Rd @@ -16,7 +16,7 @@ object.} A character vector of names } \description{ -\code{all.vars} returns all variables used in a formula. This +While \code{\link[=all.vars]{all.vars()}} returns all variables used in a formula, this function only returns the variables explicitly used on the right-hand side (i.e., it will not resolve dots unless the object is terms with a data set specified). diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 2bc14356..96dff7d1 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -22,8 +22,7 @@ A single character or factor vector. } \description{ Produce a vector of resampling labels (e.g. "Fold1") from -an \code{rset} object. Currently, \code{nested_cv()} -is not supported. +an \code{rset} object. Currently, \code{\link[=nested_cv]{nested_cv()}} is not supported. } \examples{ labels(vfold_cv(mtcars)) diff --git a/man/permutations.Rd b/man/permutations.Rd index c2773bed..54c14324 100644 --- a/man/permutations.Rd +++ b/man/permutations.Rd @@ -33,7 +33,7 @@ A permutation sample is the same size as the original data set and is made by permuting/shuffling one or more columns. This results in analysis samples where some columns are in their original order and some columns are permuted to a random order. Unlike other sampling functions in -rsample, there is no assessment set and calling \code{assessment()} on a +rsample, there is no assessment set and calling \code{\link[=assessment]{assessment()}} on a permutation split will throw an error. } \details{ diff --git a/man/reg_intervals.Rd b/man/reg_intervals.Rd index cf3b04a2..23a96fe2 100644 --- a/man/reg_intervals.Rd +++ b/man/reg_intervals.Rd @@ -38,7 +38,7 @@ NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals.} \item{keep_reps}{Should the individual parameter estimates for each bootstrap sample be retained?} -\item{...}{Options to pass to the model function (such as \code{family} for \code{glm()}).} +\item{...}{Options to pass to the model function (such as \code{family} for \code{\link[stats:glm]{stats::glm()}}).} } \value{ A tibble with columns "term", ".lower", ".estimate", ".upper", diff --git a/man/rsample2caret.Rd b/man/rsample2caret.Rd index ac712c5b..e96fc98b 100644 --- a/man/rsample2caret.Rd +++ b/man/rsample2caret.Rd @@ -11,7 +11,7 @@ caret2rsample(ctrl, data = NULL) } \arguments{ \item{object}{An \code{rset} object. Currently, -\code{nested_cv()} is not supported.} +\code{\link[=nested_cv]{nested_cv()}} is not supported.} \item{data}{The data that was originally used to produce the \code{ctrl} object.} diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index 3d75c389..eca6c81b 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -28,8 +28,8 @@ sample for the same row in the original data.} A tibble with columns \code{Row} and \code{Data}. The latter has possible values "Analysis" or "Assessment". For \code{rset} inputs, identification columns are also returned but their names and values depend on the type of -resampling. For \code{vfold_cv()}, contains a column "Fold" and, if repeats are -used, another called "Repeats". \code{bootstraps()} and \code{mc_cv()} use the column +resampling. For \code{\link[=vfold_cv]{vfold_cv()}}, contains a column "Fold" and, if repeats are +used, another called "Repeats". \code{\link[=bootstraps]{bootstraps()}} and \code{\link[=mc_cv]{mc_cv()}} use the column "Resample". } \description{ From 71f779449f91ee2a0827fa136af753cb666bc8ab Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:49:49 +0100 Subject: [PATCH 48/82] but don't link to the page you're already on --- R/make_groups.R | 2 +- man/make_groups.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/make_groups.R b/R/make_groups.R index d5dabf24..47f43021 100644 --- a/R/make_groups.R +++ b/R/make_groups.R @@ -25,7 +25,7 @@ #' only one) assessment set, but rather allow each observation to be in an #' assessment set zero-or-more times. As a result, those functions don't have #' a `balance` argument, and under the hood always specify `balance = "prop"` -#' when they call [make_groups()]. +#' when they call `make_groups()`. #' #' @keywords internal make_groups <- function(data, diff --git a/man/make_groups.Rd b/man/make_groups.Rd index d2bdc62a..508d9c5e 100644 --- a/man/make_groups.Rd +++ b/man/make_groups.Rd @@ -49,6 +49,6 @@ Similarly, \code{\link[=group_mc_cv]{group_mc_cv()}} and its derivatives don't a only one) assessment set, but rather allow each observation to be in an assessment set zero-or-more times. As a result, those functions don't have a \code{balance} argument, and under the hood always specify \code{balance = "prop"} -when they call \code{\link[=make_groups]{make_groups()}}. +when they call \code{make_groups()}. } \keyword{internal} From 7e2ca02399b1f083c068be883dfe9af66ca91609 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:51:28 +0100 Subject: [PATCH 49/82] misc tidy styling --- R/labels.R | 4 ++-- R/printing.R | 2 +- R/reg_intervals.R | 10 +++++----- R/rsplit.R | 2 +- R/tidy.R | 2 +- man/add_resample_id.Rd | 2 +- man/as.data.frame.rsplit.Rd | 2 +- man/labels.rset.Rd | 2 +- man/reg_intervals.Rd | 10 +++++----- man/tidy.rsplit.Rd | 2 +- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/R/labels.R b/R/labels.R index 131bc635..becce98e 100644 --- a/R/labels.R +++ b/R/labels.R @@ -3,7 +3,7 @@ #' Produce a vector of resampling labels (e.g. "Fold1") from #' an `rset` object. Currently, [nested_cv()] is not supported. #' -#' @param object An `rset` object +#' @param object An `rset` object. #' @param make_factor A logical for whether the results should be #' a character or a factor. #' @param ... Not currently used. @@ -67,7 +67,7 @@ labels.rsplit <- function(object, ...) { #' For a data set, `add_resample_id()` will add at least one new column that #' identifies which resample that the data came from. In most cases, a single #' column is added but for some resampling methods, two or more are added. -#' @param .data A data frame +#' @param .data A data frame. #' @param split A single `rset` object. #' @param dots A single logical: should the id columns be prefixed with a "." #' to avoid name conflicts with `.data`? diff --git a/R/printing.R b/R/printing.R index d182597c..b456c12e 100644 --- a/R/printing.R +++ b/R/printing.R @@ -1,4 +1,4 @@ -## The `pretty` methods below are good for when you need to +## The `pretty()` methods below are good for when you need to ## textually describe the resampling procedure. Note that they ## can have more than one element (in the case of nesting) diff --git a/R/reg_intervals.R b/R/reg_intervals.R index ee8bb3b4..e820ab77 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -2,13 +2,13 @@ #' #' @param formula An R model formula with one outcome and at least one predictor. #' @param data A data frame. -#' @param model_fn The model to fit. Allowable values are "lm", "glm", -#' "survreg", and "coxph". The latter two require that the `survival` package +#' @param model_fn The model to fit. Allowable values are `"lm"`, `"glm"`, +#' `"survreg"`, and `"coxph"`. The latter two require that the survival package #' be installed. -#' @param type The type of bootstrap confidence interval. Values of "student-t" and -#' "percentile" are allowed. +#' @param type The type of bootstrap confidence interval. Values of `"student-t"` and +#' `"percentile"` are allowed. #' @param times A single integer for the number of bootstrap samples. If left -#' NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals. +#' `NULL`, 1,001 are used for t-intervals and 2,001 for percentile intervals. #' @param alpha Level of significance. #' @param filter A logical expression used to remove rows from the final result, or `NULL` to keep all rows. #' @param keep_reps Should the individual parameter estimates for each bootstrap diff --git a/R/rsplit.R b/R/rsplit.R index 1bc38128..394145a4 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -71,7 +71,7 @@ as.integer.rsplit <- #' @param x An `rsplit` object. #' @param row.names `NULL` or a character vector giving the row names for the data frame. Missing values are not allowed. #' @param optional A logical: should the column names of the data be checked for legality? -#' @param data Either "analysis" or "assessment" to specify which data are returned. +#' @param data Either `"analysis"` or `"assessment"` to specify which data are returned. #' @param ... Not currently used. #' @examples #' library(dplyr) diff --git a/R/tidy.R b/R/tidy.R index 60a5c6a2..39c73ba1 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -3,7 +3,7 @@ #' The `tidy()` function from the \pkg{broom} package can be used on `rset` and #' `rsplit` objects to generate tibbles with which rows are in the analysis and #' assessment sets. -#' @param x A `rset` or `rsplit` object +#' @param x A `rset` or `rsplit` object #' @param unique_ind Should unique row identifiers be returned? For example, #' if `FALSE` then bootstrapping results will include multiple rows in the #' sample for the same row in the original data. diff --git a/man/add_resample_id.Rd b/man/add_resample_id.Rd index a1d6450a..5b73deb1 100644 --- a/man/add_resample_id.Rd +++ b/man/add_resample_id.Rd @@ -7,7 +7,7 @@ add_resample_id(.data, split, dots = FALSE) } \arguments{ -\item{.data}{A data frame} +\item{.data}{A data frame.} \item{split}{A single \code{rset} object.} diff --git a/man/as.data.frame.rsplit.Rd b/man/as.data.frame.rsplit.Rd index 6de9ad42..1ac6b7cb 100644 --- a/man/as.data.frame.rsplit.Rd +++ b/man/as.data.frame.rsplit.Rd @@ -31,7 +31,7 @@ assessment(x, ...) \item{optional}{A logical: should the column names of the data be checked for legality?} -\item{data}{Either "analysis" or "assessment" to specify which data are returned.} +\item{data}{Either \code{"analysis"} or \code{"assessment"} to specify which data are returned.} \item{...}{Not currently used.} } diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 96dff7d1..0f314296 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -10,7 +10,7 @@ \method{labels}{vfold_cv}(object, make_factor = FALSE, ...) } \arguments{ -\item{object}{An \code{rset} object} +\item{object}{An \code{rset} object.} \item{make_factor}{A logical for whether the results should be a character or a factor.} diff --git a/man/reg_intervals.Rd b/man/reg_intervals.Rd index 23a96fe2..afc386e9 100644 --- a/man/reg_intervals.Rd +++ b/man/reg_intervals.Rd @@ -21,15 +21,15 @@ reg_intervals( \item{data}{A data frame.} -\item{model_fn}{The model to fit. Allowable values are "lm", "glm", -"survreg", and "coxph". The latter two require that the \code{survival} package +\item{model_fn}{The model to fit. Allowable values are \code{"lm"}, \code{"glm"}, +\code{"survreg"}, and \code{"coxph"}. The latter two require that the survival package be installed.} -\item{type}{The type of bootstrap confidence interval. Values of "student-t" and -"percentile" are allowed.} +\item{type}{The type of bootstrap confidence interval. Values of \code{"student-t"} and +\code{"percentile"} are allowed.} \item{times}{A single integer for the number of bootstrap samples. If left -NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals.} +\code{NULL}, 1,001 are used for t-intervals and 2,001 for percentile intervals.} \item{alpha}{Level of significance.} diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index eca6c81b..3f2bec7f 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -16,7 +16,7 @@ \method{tidy}{nested_cv}(x, unique_ind = TRUE, ...) } \arguments{ -\item{x}{A \code{rset} or \code{rsplit} object} +\item{x}{A \code{rset} or \code{rsplit} object} \item{unique_ind}{Should unique row identifiers be returned? For example, if \code{FALSE} then bootstrapping results will include multiple rows in the From 6b0b52c3b3c1cadf142b5cd370c1f1f81062e844 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:51:51 +0100 Subject: [PATCH 50/82] doc leftover --- man/group_bootstraps.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/group_bootstraps.Rd b/man/group_bootstraps.Rd index e93dccd3..9a3c7878 100644 --- a/man/group_bootstraps.Rd +++ b/man/group_bootstraps.Rd @@ -25,7 +25,7 @@ assessment set within a fold.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary()} function that require the apparent +some estimators used by the \code{\link[=summary]{summary()}} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} From 8fdd408ba791c8105b5f938d478c23cba5a5b38c Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:52:16 +0100 Subject: [PATCH 51/82] Add acknowledgement --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index d5814b8f..ed7ab225 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ * Formatting improvement: package names are now not in backticks anymore (@agmurray, #525). +* Improved documentation and formatting: function names are now more easily identifiable through either `()` at the end or being links to the function documentation (@brshallo , #521). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 64d2739f42975b18fabd73d6c863d90d9982b042 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 18:01:14 +0100 Subject: [PATCH 52/82] Add context in description --- R/rolling_origin.R | 12 ++++++++++-- man/rolling_origin.Rd | 7 +++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 3325961a..a9099d46 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -1,11 +1,18 @@ #' Rolling Origin Forecast Resampling #' +#' @description #' `r lifecycle::badge("superseded")` #' #' This resampling method is useful when the data set has a strong time #' component. The resamples are not random and contain data points that are #' consecutive values. The function assumes that the original data set are #' sorted in time order. +#' +#' This function is superseded by [sliding_window()], [sliding_index()], and +#' [sliding_period()] which provide more flexibility and control. Superseded +#' functions will not go away, but active development will be focused on the new +#' functions. +#' #' @details The main options, `initial` and `assess`, control the number of #' data points from the original data that are in the analysis and assessment #' set, respectively. When `cumulative = TRUE`, the analysis set will grow as @@ -63,8 +70,9 @@ rolling_origin <- function(data, initial = 5, assess = 1, cumulative = TRUE, skip = 0, lag = 0, ...) { lifecycle::signal_stage( - stage = "superseded", what = "rolling_origin()", - with = I("sliding_window(), sliding_index() and sliding_period()") + stage = "superseded", + what = "rolling_origin()", + with = I("`sliding_window()`, `sliding_index()` and `sliding_period()`") ) check_dots_empty() diff --git a/man/rolling_origin.Rd b/man/rolling_origin.Rd index 27597678..48f4c39e 100644 --- a/man/rolling_origin.Rd +++ b/man/rolling_origin.Rd @@ -42,10 +42,17 @@ and a column called \code{id} that has a character string with the resample identifier. } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + This resampling method is useful when the data set has a strong time component. The resamples are not random and contain data points that are consecutive values. The function assumes that the original data set are sorted in time order. + +This function is superseded by \code{\link[=sliding_window]{sliding_window()}}, \code{\link[=sliding_index]{sliding_index()}}, and +\code{\link[=sliding_period]{sliding_period()}} which provide more flexibility and control. Superseded +functions will not go away, but active development will be focused on the new +functions. } \details{ The main options, \code{initial} and \code{assess}, control the number of From cde1d32782b4308db336070c99db4efcb0977e78 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 18:05:40 +0100 Subject: [PATCH 53/82] Add acknowledgment --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a6f6374c..93778ddc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Fixed example for `nested_cv()` (@seb09, #520). +* `rolling_origin()` is now superseded by `sliding_window()`, `sliding_index()`, and `sliding_period()` which provide more flexibility and control (@nmercadeb, #524). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 515c3ff6ba2f0143041baef778e83ba4752ab5e8 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:23:14 +0100 Subject: [PATCH 54/82] move check to `check_v()` - use cli - thread call through - keep error for repeated CV --- R/vfold.R | 21 ++++++++++++--------- tests/testthat/_snaps/vfold.md | 4 +--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/vfold.R b/R/vfold.R index 94f12c0a..436f9a42 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -76,20 +76,17 @@ vfold_cv <- function(data, v = 10, repeats = 1, strata_check(strata, data) check_repeats(repeats) - if (isTRUE(v == nrow(data))) { - rlang::abort(c( - "Leave-one-out cross-validation is not supported by `vfold_cv()`.", - x = "You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation.", - i = "Use `loo_cv()` in this case." - )) - } - if (repeats == 1) { split_objs <- vfold_splits( data = data, v = v, strata = strata, breaks = breaks, pool = pool ) } else { + if (v == nrow(data)) { + rlang::abort( + glue::glue("Repeated resampling when `v` is {v} would create identical resamples") + ) + } for (i in 1:repeats) { tmp <- vfold_splits(data = data, v = v, strata = strata, breaks = breaks ,pool = pool) tmp$id2 <- tmp$id @@ -337,13 +334,19 @@ add_vfolds <- function(x, v) { x } -check_v <- function(v, max_v, rows = "rows", call = rlang::caller_env()) { +check_v <- function(v, max_v, rows = "rows", prevent_loo = TRUE, call = rlang::caller_env()) { if (!is.numeric(v) || length(v) != 1 || v < 2) { rlang::abort("`v` must be a single positive integer greater than 1", call = call) } else if (v > max_v) { rlang::abort( glue::glue("The number of {rows} is less than `v = {v}`"), call = call ) + } else if (prevent_loo && isTRUE(v == max_v)) { + cli_abort(c( + "Leave-one-out cross-validation is not supported by this function.", + "x" = "You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation.", + "i" = "Use `loo_cv()` in this case." + ), call = call) } } diff --git a/tests/testthat/_snaps/vfold.md b/tests/testthat/_snaps/vfold.md index b9542c7d..50788f81 100644 --- a/tests/testthat/_snaps/vfold.md +++ b/tests/testthat/_snaps/vfold.md @@ -25,9 +25,7 @@ --- - Leave-one-out cross-validation is not supported by `vfold_cv()`. - x You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation. - i Use `loo_cv()` in this case. + Repeated resampling when `v` is 150 would create identical resamples --- From f1d4217e668fdd1032d0939262e3823ecb4e96b7 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:24:38 +0100 Subject: [PATCH 55/82] still allow leave-one-group-out CV --- R/vfold.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/vfold.R b/R/vfold.R index 436f9a42..db1983c9 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -313,7 +313,7 @@ group_vfold_splits <- function(data, group, v = NULL, balance, strata = NULL, po if (is.null(v)) { v <- max_v } - check_v(v = v, max_v = max_v, rows = "groups", call = rlang::caller_env()) + check_v(v = v, max_v = max_v, rows = "groups", prevent_loo = FALSE, call = rlang::caller_env()) indices <- make_groups(data, group, v, balance, strata) indices <- lapply(indices, default_complement, n = nrow(data)) From e7895a9b95ac7fc787307af11785e562fc2c315d Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:25:35 +0100 Subject: [PATCH 56/82] let `loo_cv()` still use `vfold_splits()` --- R/loo.R | 2 +- R/vfold.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/loo.R b/R/loo.R index c5329d1d..59457c89 100644 --- a/R/loo.R +++ b/R/loo.R @@ -13,7 +13,7 @@ #' @export loo_cv <- function(data, ...) { check_dots_empty() - split_objs <- vfold_splits(data = data, v = nrow(data)) + split_objs <- vfold_splits(data = data, v = nrow(data), prevent_loo = FALSE) split_objs <- list( splits = map(split_objs$splits, change_class), diff --git a/R/vfold.R b/R/vfold.R index db1983c9..23ca77d4 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -124,10 +124,10 @@ vfold_cv <- function(data, v = 10, repeats = 1, } -vfold_splits <- function(data, v = 10, strata = NULL, breaks = 4, pool = 0.1) { +vfold_splits <- function(data, v = 10, strata = NULL, breaks = 4, pool = 0.1, prevent_loo = TRUE) { n <- nrow(data) - check_v(v, n, call = rlang::caller_env()) + check_v(v, n, prevent_loo = prevent_loo, call = rlang::caller_env()) if (is.null(strata)) { folds <- sample(rep(1:v, length.out = n)) From eae8b182dbf9056443676c8fd6d8d35b9078abc8 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:26:01 +0100 Subject: [PATCH 57/82] capture error in tests --- tests/testthat/_snaps/clustering.md | 10 ++++++++++ tests/testthat/_snaps/vfold.md | 10 ++++++++++ tests/testthat/test-clustering.R | 1 + tests/testthat/test-vfold.R | 1 + 4 files changed, 22 insertions(+) diff --git a/tests/testthat/_snaps/clustering.md b/tests/testthat/_snaps/clustering.md index 956070ab..16d691b2 100644 --- a/tests/testthat/_snaps/clustering.md +++ b/tests/testthat/_snaps/clustering.md @@ -30,6 +30,16 @@ `repeats` must be a single positive integer +--- + + Code + clustering_cv(mtcars, mpg, v = nrow(mtcars)) + Condition + Error in `clustering_cv()`: + ! Leave-one-out cross-validation is not supported by this function. + x You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation. + i Use `loo_cv()` in this case. + # printing Code diff --git a/tests/testthat/_snaps/vfold.md b/tests/testthat/_snaps/vfold.md index 50788f81..1af5a8a0 100644 --- a/tests/testthat/_snaps/vfold.md +++ b/tests/testthat/_snaps/vfold.md @@ -35,6 +35,16 @@ `repeats` must be a single positive integer +--- + + Code + vfold_cv(mtcars, v = nrow(mtcars)) + Condition + Error in `vfold_cv()`: + ! Leave-one-out cross-validation is not supported by this function. + x You set `v` to `nrow(data)`, which would result in a leave-one-out cross-validation. + i Use `loo_cv()` in this case. + # printing Code diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R index 00e27a25..f049bd00 100644 --- a/tests/testthat/test-clustering.R +++ b/tests/testthat/test-clustering.R @@ -45,6 +45,7 @@ test_that("bad args", { expect_snapshot(error = TRUE, clustering_cv(Orange, v = 1, vars = "Tree")) expect_snapshot_error(clustering_cv(Orange, repeats = 0)) expect_snapshot_error(clustering_cv(Orange, repeats = NULL)) + expect_snapshot(error = TRUE, clustering_cv(mtcars, mpg, v = nrow(mtcars))) }) test_that("printing", { diff --git a/tests/testthat/test-vfold.R b/tests/testthat/test-vfold.R index bbf9d890..51c00ba4 100644 --- a/tests/testthat/test-vfold.R +++ b/tests/testthat/test-vfold.R @@ -85,6 +85,7 @@ test_that("bad args", { expect_snapshot_error(vfold_cv(iris, v = 150, repeats = 2)) expect_snapshot_error(vfold_cv(Orange, repeats = 0)) expect_snapshot_error(vfold_cv(Orange, repeats = NULL)) + expect_snapshot(error = TRUE, vfold_cv(mtcars, v = nrow(mtcars))) }) test_that("printing", { From 1c78123a032e7be264efacf3d7eb95ee1d98bf16 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:31:37 +0100 Subject: [PATCH 58/82] keep docs brief for symmetry with the lower bound for `v` --- R/vfold.R | 4 +--- man/vfold_cv.Rd | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/R/vfold.R b/R/vfold.R index 23ca77d4..6060d8f2 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -12,9 +12,7 @@ #' @template strata_details #' @inheritParams make_strata #' @param data A data frame. -#' @param v The number of partitions of the data set. Should be an integer -#' smaller than `nrow(data)`. If you want to create a split for a leave-one-out -#' cross-validation (`v = nrow(data)`), please use [loo_cv()] instead. +#' @param v The number of partitions of the data set. #' @param repeats The number of times to repeat the V-fold partitioning. #' @param strata A variable in `data` (single character or name) used to conduct #' stratified sampling. When not `NULL`, each resample is created within the diff --git a/man/vfold_cv.Rd b/man/vfold_cv.Rd index 6524efd9..d605b747 100644 --- a/man/vfold_cv.Rd +++ b/man/vfold_cv.Rd @@ -9,9 +9,7 @@ vfold_cv(data, v = 10, repeats = 1, strata = NULL, breaks = 4, pool = 0.1, ...) \arguments{ \item{data}{A data frame.} -\item{v}{The number of partitions of the data set. Should be an integer -smaller than \code{nrow(data)}. If you want to create a split for a leave-one-out -cross-validation (\code{v = nrow(data)}), please use \code{\link[=loo_cv]{loo_cv()}} instead.} +\item{v}{The number of partitions of the data set.} \item{repeats}{The number of times to repeat the V-fold partitioning.} From c24701c418ea2ec3b1e72c5f1284a94fa65d2007 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 14:35:40 +0100 Subject: [PATCH 59/82] Add acknowledgment --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2e678b33..68140eac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Started moving error messages to cli (#499, #502). +* `vfold_cv()` and `clustering_cv()` now error on implicit leave-one-out cross-validation (@seb09, #527). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From c6f907d34ce878509347f3fd52be512518d0ff60 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 15:03:02 +0100 Subject: [PATCH 60/82] accept new snapshot for `nested_cv()` --- tests/testthat/_snaps/nesting.md | 4 +-- tests/testthat/_snaps/nesting.new.md | 52 ---------------------------- 2 files changed, 2 insertions(+), 54 deletions(-) delete mode 100644 tests/testthat/_snaps/nesting.new.md diff --git a/tests/testthat/_snaps/nesting.md b/tests/testthat/_snaps/nesting.md index a06abd43..349a32cc 100644 --- a/tests/testthat/_snaps/nesting.md +++ b/tests/testthat/_snaps/nesting.md @@ -25,8 +25,8 @@ Code nested_cv(mtcars, outside = vfold_cv(), inside = folds) Condition - Error in `list2()`: - ! Argument 3 can't be empty. + Error in `nested_cv()`: + ! `inside` should be a expression such as `vfold()` or `bootstraps(times = 10)` instead of an existing object. # printing diff --git a/tests/testthat/_snaps/nesting.new.md b/tests/testthat/_snaps/nesting.new.md deleted file mode 100644 index 349a32cc..00000000 --- a/tests/testthat/_snaps/nesting.new.md +++ /dev/null @@ -1,52 +0,0 @@ -# bad args - - Code - skip_if(new_rng_snapshots) - set.seed(123) - nested_cv(mtcars, outside = bootstraps(times = 5), inside = vfold_cv(v = 3)) - Condition - Warning: - Using bootstrapping as the outer resample is dangerous since the inner resample might have the same data point in both the analysis and assessment set. - Output - # Nested resampling: - # outer: Bootstrap sampling - # inner: 3-fold cross-validation - # A tibble: 5 x 3 - splits id inner_resamples - - 1 Bootstrap1 - 2 Bootstrap2 - 3 Bootstrap3 - 4 Bootstrap4 - 5 Bootstrap5 - ---- - - Code - nested_cv(mtcars, outside = vfold_cv(), inside = folds) - Condition - Error in `nested_cv()`: - ! `inside` should be a expression such as `vfold()` or `bootstraps(times = 10)` instead of an existing object. - -# printing - - Code - rs1 - Output - # Nested resampling: - # outer: 10-fold cross-validation - # inner: 3-fold cross-validation - # A tibble: 10 x 3 - splits id inner_resamples - - 1 Fold01 - 2 Fold02 - 3 Fold03 - 4 Fold04 - 5 Fold05 - 6 Fold06 - 7 Fold07 - 8 Fold08 - 9 Fold09 - 10 Fold10 - From cdc7de80edf98fd4a18eee30e207591fa2d988b2 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 15:36:46 +0100 Subject: [PATCH 61/82] remove trailing whitespace --- R/initial_validation_split.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/initial_validation_split.R b/R/initial_validation_split.R index 0de77be0..bb76d890 100644 --- a/R/initial_validation_split.R +++ b/R/initial_validation_split.R @@ -304,7 +304,7 @@ validation <- function(x, ...) { validation.default <- function(x, ...) { cls <- class(x) cli_abort( - "No method for objects of class{?es}: {.cls {cls}} " + "No method for objects of class{?es}: {.cls {cls}}" ) } From 09c03f5270ff725ed2b0c11d7518e03f65ee8fb9 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 15:37:07 +0100 Subject: [PATCH 62/82] Add acknowledgement --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2e678b33..1d129095 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Started moving error messages to cli (#499, #502). +* Error improvements via cli by @PriKalra (#523). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 08c34f2f31874d02fad38abc6a6437020b5df727 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:04:28 +0100 Subject: [PATCH 63/82] data frame is not a class --- R/slide.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/slide.R b/R/slide.R index 8d4d5c7e..36b2f694 100644 --- a/R/slide.R +++ b/R/slide.R @@ -216,7 +216,7 @@ sliding_window <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - cli_abort("{.arg data} must be a {.cls data frame}.") + cli_abort("{.arg data} must be a data frame.") } lookback <- check_lookback(lookback) @@ -299,7 +299,7 @@ sliding_index <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - cli_abort("{.arg data} must be a {.cls data frame}.") + cli_abort("{.arg data} must be a data frame.") } step <- check_step(step) @@ -389,7 +389,7 @@ sliding_period <- function(data, rlang::check_dots_empty() if (!is.data.frame(data)) { - cli_abort("{.arg data} must be a {.cls data frame}.") + cli_abort("{.arg data} must be a data frame.") } lookback <- check_lookback(lookback) From 12ccf4835485539a7eeaae4ca62425f6655c4bc0 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:08:06 +0100 Subject: [PATCH 64/82] only restyle without changing error message - evaluate `arg` to keep errors informative - what the arg actually was will come via type checkers --- R/slide.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/slide.R b/R/slide.R index 36b2f694..a78e5bd9 100644 --- a/R/slide.R +++ b/R/slide.R @@ -499,7 +499,7 @@ check_lookback <- function(x) { check_assess <- function(x, arg) { if (vctrs::vec_size(x) != 1L) { - cli_abort("{.arg arg} must have size 1.You have provided {.arg {arg}}") + cli_abort("{.arg {arg}} must have size 1.") } if (identical(x, Inf)) { @@ -507,11 +507,11 @@ check_assess <- function(x, arg) { } if (!rlang::is_integerish(x, finite = TRUE)) { - cli_abort("{.arg arg} must be an integer of size 1, or `Inf`. You have provided {.arg {arg}}") + cli_abort("{.arg {arg}} must be an integer of size 1, or {.code Inf}.") } if (x <= 0L) { - cli_abort("{.arg arg} must be positive.") + cli_abort("{.arg {arg}} must be positive.") } vctrs::vec_cast(x, integer(), x_arg = arg) From d1f15efc6ed7cc0bce563bec1c9bb3deb2750f94 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:09:26 +0100 Subject: [PATCH 65/82] Add acknowledgement --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a6f6374c..5cbf4057 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#526) and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). From b21cb5a149a3d25d002b4d9101acd338ed179451 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:56:51 +0100 Subject: [PATCH 66/82] keep "data frame" without class styling --- R/labels.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/labels.R b/R/labels.R index 2aff05ba..268492c3 100644 --- a/R/labels.R +++ b/R/labels.R @@ -95,7 +95,7 @@ add_resample_id <- function(.data, split, dots = FALSE) { cli_abort("{.arg dots} should be a single logical.") } if (!inherits(.data, "data.frame")) { - cli_abort("{.arg .data} should be a {.cls data frame}.") + cli_abort("{.arg .data} should be a data frame.") } if (!inherits(split, "rsplit")) { cli_abort("{.arg split} should be a single 'rset' object.") From 63baecce620bd48ec3ebeb393c78ce7e0ce7d538 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:57:09 +0100 Subject: [PATCH 67/82] add class styling --- R/labels.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/labels.R b/R/labels.R index 268492c3..fcaa5ecd 100644 --- a/R/labels.R +++ b/R/labels.R @@ -98,12 +98,12 @@ add_resample_id <- function(.data, split, dots = FALSE) { cli_abort("{.arg .data} should be a data frame.") } if (!inherits(split, "rsplit")) { - cli_abort("{.arg split} should be a single 'rset' object.") + cli_abort("{.arg split} should be a single {.cls rset} object.") } labs <- labels(split) if (!tibble::is_tibble(labs) && nrow(labs) == 1) { - cli_abort("{.arg split} should be a single 'rset' object.") + cli_abort("{.arg split} should be a single {.cls rset} object.") } if (dots) { From 5e01698ed73bed6b049f87d2e7b889b95e552a8d Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 19:59:44 +0100 Subject: [PATCH 68/82] Add and consolidate acknowledgement --- NEWS.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index cef13e20..c18dc579 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#526) and @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528) and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). @@ -18,8 +18,6 @@ * `vfold_cv()` and `clustering_cv()` now error on implicit leave-one-out cross-validation (@seb09, #527). -* Error improvements via cli by @PriKalra (#523). - ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From 3faebaf3fb463c76ae9e0ae32f7421b6b87d3b12 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 20:16:04 +0100 Subject: [PATCH 69/82] update test --- tests/testthat/_snaps/rset.md | 8 ++++++++ tests/testthat/test-rset.R | 5 +---- 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/_snaps/rset.md diff --git a/tests/testthat/_snaps/rset.md b/tests/testthat/_snaps/rset.md new file mode 100644 index 00000000..60dc8c41 --- /dev/null +++ b/tests/testthat/_snaps/rset.md @@ -0,0 +1,8 @@ +# bad args + + Code + new_rset(list(1), "x") + Condition + Error in `new_rset()`: + ! Each element of `splits` must be an object. + diff --git a/tests/testthat/test-rset.R b/tests/testthat/test-rset.R index 32d5e8cc..c09e0cab 100644 --- a/tests/testthat/test-rset.R +++ b/tests/testthat/test-rset.R @@ -8,10 +8,7 @@ test_that("bad args", { expect_error( new_rset(car_folds$splits, car_folds$splits) ) - expect_error( - new_rset(list(1), "x"), - "must be an `rsplit` object" - ) + expect_snapshot(error = TRUE, {new_rset(list(1), "x")}) args <- list(a = 1, b = 2, 3) expect_error( new_rset( From 972acb5c73bbdc890fa7b010c0261472aae6f4d2 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Mon, 9 Sep 2024 20:18:29 +0100 Subject: [PATCH 70/82] Update acknowledgment --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c18dc579..4960e310 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528) and @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530) and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). From 6281f3972c98c036bfdb6ff577ebdb1496015288 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 10 Sep 2024 14:27:03 +0100 Subject: [PATCH 71/82] more in keeping with other error messages --- R/permutations.R | 16 ++++++++-------- R/reg_intervals.R | 10 ++-------- tests/testthat/_snaps/permutations.md | 10 ++++++++++ tests/testthat/test-permutations.R | 2 +- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/R/permutations.R b/R/permutations.R index 64e17b0b..47fe3aba 100644 --- a/R/permutations.R +++ b/R/permutations.R @@ -53,20 +53,20 @@ permutations <- function(data, permute <- rlang::enquo(permute) if (is.null(permute)) { - rlang::abort("You must specify at least one column to permute!") + cli_abort("You must specify at least one column to permute.") } col_id <- tidyselect::eval_select(permute, data) if (identical(length(col_id), 0L)) { - cli_abort("{.strong You must specify at least one column to permute!}") + cli_abort("You must specify at least one column to permute.") } else if (identical(length(col_id), ncol(data))) { cli_abort(c( - "!" = "{.emph {.strong You have selected all columns to permute.}}", - "x" = "This effectively reorders the rows in the original data without changing the data structure.", - "i" = "To achieve meaningful permutation:", - "*" = "{.field Select fewer columns} to permute.", - ">" = "Ideal: Choose specific columns that are relevant to your analysis.")) - } + "You have selected all columns to permute.", + "i" = "This effectively reorders the rows in the original data without + changing the data structure.", + ">" = "Please select fewer columns to permute." + )) + } split_objs <- perm_splits(data, times) diff --git a/R/reg_intervals.R b/R/reg_intervals.R index ef2faed8..7da4fada 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -54,18 +54,12 @@ reg_intervals <- } else { times <- times[1] if (!is.numeric(times)) { - cli_abort(c( - "x" = "{.arg times} should be a single integer.", - "i" = "You provided {.val {times}}." - )) + cli_abort("{.arg times} should be a single integer.") } } if (length(alpha) != 1 || !is.numeric(alpha)) { - cli_abort(c( - "x" = "{.arg alpha} must be a single numeric value.", - "i" = "Please ensure that {.arg alpha} is a numeric value and not a vector or other type." - )) + cli_abort("{.arg alpha} must be a single numeric value.") } if (model_fn %in% c("survreg", "coxph")) { diff --git a/tests/testthat/_snaps/permutations.md b/tests/testthat/_snaps/permutations.md index 1f1b3571..84233a4f 100644 --- a/tests/testthat/_snaps/permutations.md +++ b/tests/testthat/_snaps/permutations.md @@ -6,6 +6,16 @@ Error in `as.data.frame()`: ! There is no assessment data set for an `rsplit` object with class . +# bad args + + Code + permutations(mtcars, everything()) + Condition + Error in `permutations()`: + ! You have selected all columns to permute. + i This effectively reorders the rows in the original data without changing the data structure. + > Please select fewer columns to permute. + # printing Code diff --git a/tests/testthat/test-permutations.R b/tests/testthat/test-permutations.R index e2cd4702..91e3dfdf 100644 --- a/tests/testthat/test-permutations.R +++ b/tests/testthat/test-permutations.R @@ -37,7 +37,7 @@ test_that("bad args", { expect_error(permutations(mtcars)) # no columns specified expect_error(permutations(mtcars, foo)) # column doesn't exist expect_error(permutations(mtcars, start_with("z"))) # column doesn't exist - expect_error(permutations(mtcars, everything())) # all columns + expect_snapshot(error = TRUE, {permutations(mtcars, everything())}) # all columns }) test_that("printing", { From 71511bf46223b66658922a2baf889de60ee442cc Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 10 Sep 2024 14:28:58 +0100 Subject: [PATCH 72/82] Update acknowledgment --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 4960e310..dcd0ac7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530) and @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530, #531) and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). From 78c22ce4b5361d87050dcc0070f9795355159785 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 16:05:26 +0100 Subject: [PATCH 73/82] update styling --- R/tidy.R | 2 +- R/validation_set.R | 4 ++-- R/vfold.R | 23 +++++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 48839b5f..d35f8731 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -119,7 +119,7 @@ tidy.nested_cv <- function(x, unique_ind = TRUE, ...) { inner_id <- grep("^id", names(inner_tidy)) if (length(inner_id) != length(id_cols)) { - cli_abort("{.strong {.red Cannot} merge tidy data sets}") + cli_abort("Cannot merge tidy data sets.") } names(inner_tidy)[inner_id] <- id_cols full_join(outer_tidy, inner_tidy, by = id_cols) diff --git a/R/validation_set.R b/R/validation_set.R index 23683488..50e67d6a 100644 --- a/R/validation_set.R +++ b/R/validation_set.R @@ -90,7 +90,7 @@ validation.val_split <- function(x, ...) { #' @export testing.val_split <- function(x, ...) { cli_abort(c( - "x" = "{.strong The testing data is not part of the validation set object.}", - "i" = "It is part of the result of the initial 3-way split, e.g., with {.code initial_validation_split()}." + "The testing data is not part of the validation set object.", + "i" = "It is part of the result of the initial 3-way split, e.g., with {.fun initial_validation_split}." )) } diff --git a/R/vfold.R b/R/vfold.R index efa8d6f6..a4026fd2 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -81,10 +81,9 @@ vfold_cv <- function(data, v = 10, repeats = 1, ) } else { if (v == nrow(data)) { - cli_abort(c( - "x" = sprintf("Repeated resampling when {.arg v} is %s would create identical resamples", v), - "i" = "Consider adjusting the value of {.arg v} to avoid identical resamples." - )) + cli_abort( + "Repeated resampling when {.arg v} is {v} would create identical resamples." + ) } for (i in 1:repeats) { tmp <- vfold_splits(data = data, v = v, strata = strata, breaks = breaks ,pool = pool) @@ -227,11 +226,11 @@ group_vfold_cv <- function(data, group = NULL, v = NULL, repeats = 1, balance = } else { if (is.null(v)) { cli_abort( - "Repeated resampling when {.arg v} is {.val NULL} would create identical resamples" + "Repeated resampling when {.arg v} is {.val NULL} would create identical resamples." ) } if (v == length(unique(getElement(data, group)))) { - cli_abort("Repeated resampling when {.arg v} is {.val {v}} would create identical resamples") + cli_abort("Repeated resampling when {.arg v} is {.val {v}} would create identical resamples.") } for (i in 1:repeats) { @@ -293,8 +292,8 @@ group_vfold_splits <- function(data, group, v = NULL, balance, strata = NULL, po if (max_v < 5) { cli_abort(c( - if (is.function(message)) message() else message, - "x" = "The least common stratum only had {.val {max_v}} groups, which may not be enough for cross-validation.", + message, + "*" = "The least common stratum only had {.val {max_v}} groups, which may not be enough for cross-validation.", "i" = "Set {.arg v} explicitly to override this error." ), call = rlang::caller_env()) } @@ -333,10 +332,10 @@ add_vfolds <- function(x, v) { check_v <- function(v, max_v, rows = "rows", call = rlang::caller_env()) { if (!is.numeric(v) || length(v) != 1 || v < 2) { - cli_abort("{.var v} must be a single positive integer greater than 1", call = call) + cli_abort("{.arg v} must be a single positive integer greater than 1.", call = call) } else if (v > max_v) { cli_abort( - "The number of {.field {rows}} is less than {.arg v} = {.val {v}}", + "The number of {rows} is less than {.arg v} = {.val {v}}.", call = call ) } @@ -358,7 +357,7 @@ check_grouped_strata <- function(group, strata, pool, data) { if (nrow(vctrs::vec_unique(grouped_table)) != nrow(vctrs::vec_unique(grouped_table["group"]))) { - cli_abort("{.var strata} must be constant across all members of each {.var group}.") + cli_abort("{.arg strata} must be constant across all members of each {.arg group}.") } strata @@ -366,6 +365,6 @@ check_grouped_strata <- function(group, strata, pool, data) { check_repeats <- function(repeats, call = rlang::caller_env()) { if (!is.numeric(repeats) || length(repeats) != 1 || repeats < 1) { - cli_abort("{.var repeats} must be a single positive integer", call = call) + cli_abort("{.arg repeats} must be a single positive integer.", call = call) } } From 5d4a6ceba8a6959596dedd6e1602ea5826e7aad3 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 16:07:28 +0100 Subject: [PATCH 74/82] add styling --- R/vfold.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/vfold.R b/R/vfold.R index a4026fd2..dc783fe3 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -287,7 +287,7 @@ group_vfold_splits <- function(data, group, v = NULL, balance, strata = NULL, po )$count ) message <- c( - "Leaving `v = NULL` while using stratification will set `v` to the number of groups present in the least common stratum." + "Leaving {.code v = NULL} while using stratification will set {.arg v} to the number of groups present in the least common stratum." ) if (max_v < 5) { From 9ac53bae029e1881799cea28322ee7c9154d4407 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 16:07:55 +0100 Subject: [PATCH 75/82] update warning to cli --- NAMESPACE | 1 + R/rsample-package.R | 2 +- R/vfold.R | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f1154bcd..b886845a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -416,6 +416,7 @@ export(validation_time_split) export(vfold_cv) import(vctrs) importFrom(cli,cli_abort) +importFrom(cli,cli_warn) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,arrange_) diff --git a/R/rsample-package.R b/R/rsample-package.R index ab2e172c..766c6d5a 100644 --- a/R/rsample-package.R +++ b/R/rsample-package.R @@ -3,7 +3,7 @@ ## usethis namespace: start #' @importFrom lifecycle deprecated -#' @importFrom cli cli_abort +#' @importFrom cli cli_abort cli_warn ## usethis namespace: end NULL diff --git a/R/vfold.R b/R/vfold.R index dc783fe3..2348d91a 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -298,9 +298,9 @@ group_vfold_splits <- function(data, group, v = NULL, balance, strata = NULL, po ), call = rlang::caller_env()) } - rlang::warn(c( + cli_warn(c( message, - i = "Set `v` explicitly to override this warning." + i = "Set {.arg v} explicitly to override this warning." ), call = rlang::caller_env()) } From d341bd670e23bb65af684906e98648dfb5040f12 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 16:08:26 +0100 Subject: [PATCH 76/82] update snapshots --- tests/testthat/_snaps/clustering.md | 10 +++++----- tests/testthat/_snaps/validation_set.md | 1 + tests/testthat/_snaps/vfold.md | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/testthat/_snaps/clustering.md b/tests/testthat/_snaps/clustering.md index 956070ab..68015a14 100644 --- a/tests/testthat/_snaps/clustering.md +++ b/tests/testthat/_snaps/clustering.md @@ -4,11 +4,11 @@ --- - `v` must be a single positive integer greater than 1 + `v` must be a single positive integer greater than 1. --- - The number of rows is less than `v = 500` + The number of rows is less than `v` = 500. --- @@ -20,15 +20,15 @@ clustering_cv(Orange, v = 1, vars = "Tree") Condition Error in `clustering_cv()`: - ! `v` must be a single positive integer greater than 1 + ! `v` must be a single positive integer greater than 1. --- - `repeats` must be a single positive integer + `repeats` must be a single positive integer. --- - `repeats` must be a single positive integer + `repeats` must be a single positive integer. # printing diff --git a/tests/testthat/_snaps/validation_set.md b/tests/testthat/_snaps/validation_set.md index 9fad0bb7..930fa81d 100644 --- a/tests/testthat/_snaps/validation_set.md +++ b/tests/testthat/_snaps/validation_set.md @@ -5,4 +5,5 @@ Condition Error in `testing()`: ! The testing data is not part of the validation set object. + i It is part of the result of the initial 3-way split, e.g., with `initial_validation_split()`. diff --git a/tests/testthat/_snaps/vfold.md b/tests/testthat/_snaps/vfold.md index 50788f81..25e047d6 100644 --- a/tests/testthat/_snaps/vfold.md +++ b/tests/testthat/_snaps/vfold.md @@ -9,31 +9,31 @@ # bad args - `v` must be a single positive integer greater than 1 + `v` must be a single positive integer greater than 1. --- - `v` must be a single positive integer greater than 1 + `v` must be a single positive integer greater than 1. --- - `v` must be a single positive integer greater than 1 + `v` must be a single positive integer greater than 1. --- - The number of rows is less than `v = 500` + The number of rows is less than `v` = 500. --- - Repeated resampling when `v` is 150 would create identical resamples + Repeated resampling when `v` is 150 would create identical resamples. --- - `repeats` must be a single positive integer + `repeats` must be a single positive integer. --- - `repeats` must be a single positive integer + `repeats` must be a single positive integer. # printing @@ -57,11 +57,11 @@ # grouping -- bad args - Repeated resampling when `v` is 4 would create identical resamples + Repeated resampling when `v` is 4 would create identical resamples. --- - Repeated resampling when `v` is `NULL` would create identical resamples + Repeated resampling when `v` is "NULL" would create identical resamples. --- @@ -69,7 +69,7 @@ group_vfold_cv(Orange, v = 1, group = "Tree") Condition Error in `group_vfold_cv()`: - ! `v` must be a single positive integer greater than 1 + ! `v` must be a single positive integer greater than 1. # grouping -- other balance methods From a4f7e5c5dfbfc5d4bfc830d065b808760042af44 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 16:12:11 +0100 Subject: [PATCH 77/82] Update acknowledgements --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index dcd0ac7f..22570e96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530, #531) and @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530, #531, #532) and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). From 512892d73f78e9612a4c05d6b25f0306be6d061a Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 17:03:02 +0100 Subject: [PATCH 78/82] add some styling --- R/bootci.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/bootci.R b/R/bootci.R index 504f8aa7..57ca1dbd 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -6,22 +6,22 @@ check_rset <- function(x, app = TRUE) { if (!inherits(x, "bootstraps")) { - cli_abort("{.arg .data} should be an `rset` object generated from {.fn bootstraps}") + cli_abort("{.arg .data} should be an {.cls rset} object generated from {.fn bootstraps}.") } if (app) { if (x %>% dplyr::filter(id == "Apparent") %>% nrow() != 1) { - cli_abort("Please set `apparent = TRUE` in {.fn bootstraps} function") + cli_abort("Please set {.code apparent = TRUE} in {.fn bootstraps} function.") } } invisible(NULL) } -stat_fmt_err <- paste("`statistics` should select a list column of tidy results.") +stat_fmt_err <- "{.arg statistics} should select a list column of tidy results." stat_nm_err <- paste( - "The tibble in `statistics` should have columns for", - "'estimate' and 'term`" + "The tibble in {.arg statistics} should have columns for", + "'estimate' and 'term'." ) std_exp <- c("std.error", "robust.se") From 874c03bc00ac580e02f65e50941da15fa2aa732b Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 17:14:39 +0100 Subject: [PATCH 79/82] get pluralization of `{term{?s}` working --- R/bootci.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bootci.R b/R/bootci.R index 57ca1dbd..b57d57cc 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -129,7 +129,7 @@ check_num_resamples <- function(x, B = 1000) { if (nrow(x) > 0) { terms <- x$term - cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {terms} term{?s}.") + cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {cli::qty(terms)} term{?s} {.code {terms}}.") } invisible(NULL) } From 6220f859d0525e47c2f058da794895cdef002994 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 17:14:49 +0100 Subject: [PATCH 80/82] update snapshots --- tests/testthat/_snaps/bootci.md | 2 +- tests/testthat/_snaps/bootci.new.md | 44 ----------------------------- 2 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 tests/testthat/_snaps/bootci.new.md diff --git a/tests/testthat/_snaps/bootci.md b/tests/testthat/_snaps/bootci.md index 77e607e1..8632d77e 100644 --- a/tests/testthat/_snaps/bootci.md +++ b/tests/testthat/_snaps/bootci.md @@ -6,7 +6,7 @@ Warning: Recommend at least 1000 non-missing bootstrap resamples for term `mean`. Error in `pctl_single()`: - ! All statistics have missing values.. + ! All statistics have missing values. --- diff --git a/tests/testthat/_snaps/bootci.new.md b/tests/testthat/_snaps/bootci.new.md deleted file mode 100644 index 8632d77e..00000000 --- a/tests/testthat/_snaps/bootci.new.md +++ /dev/null @@ -1,44 +0,0 @@ -# Upper & lower confidence interval does not contain NA - - Code - int_pctl(bt_resamples, res) - Condition - Warning: - Recommend at least 1000 non-missing bootstrap resamples for term `mean`. - Error in `pctl_single()`: - ! All statistics have missing values. - ---- - - Code - int_t(bt_resamples, res) - Condition - Warning: - Recommend at least 500 non-missing bootstrap resamples for term `mean`. - Error in `t_single()`: - ! All statistics have missing values. - ---- - - Code - int_bca(bt_resamples, res, .fn = bad_stats) - Condition - Warning: - Recommend at least 1000 non-missing bootstrap resamples for term `mean`. - Error in `bca_calc()`: - ! All statistics have missing values. - -# regression intervals - - Code - skip_if(new_rng_snapshots) - set.seed(123) - int_2 <- reg_intervals(mpg ~ disp + wt, data = mtcars, filter = term == "wt", - model_fn = "glm", keep_reps = TRUE) - int_2 - Output - # A tibble: 1 x 7 - term .lower .estimate .upper .alpha .method .replicates - > - 1 wt -5.62 -3.46 -0.955 0.05 student-t [1,001 x 2] - From 576ff6fa27c75a8fc2b3fa23a8c0c309849fb140 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 17:16:17 +0100 Subject: [PATCH 81/82] Add acknowledgement --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 22570e96..bd6f3efe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489). -* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530, #531, #532) and @JamesHWade (#518). +* Started moving error messages to cli (#499, #502). With contributions from @PriKalra (#523, #526, #528, #530, #531, #532), @Dpananos (#516), and @JamesHWade (#518). * Fixed example for `nested_cv()` (@seb09, #520). From 826dc2af2cce6b8725246acbee8f2be56c6f1553 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 11 Sep 2024 17:17:30 +0100 Subject: [PATCH 82/82] `cli_warn()` is now already imported --- R/bootci.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bootci.R b/R/bootci.R index b57d57cc..bd6ef9af 100644 --- a/R/bootci.R +++ b/R/bootci.R @@ -129,7 +129,7 @@ check_num_resamples <- function(x, B = 1000) { if (nrow(x) > 0) { terms <- x$term - cli::cli_warn("Recommend at least {B} non-missing bootstrap resamples for {cli::qty(terms)} term{?s} {.code {terms}}.") + cli_warn("Recommend at least {B} non-missing bootstrap resamples for {cli::qty(terms)} term{?s} {.code {terms}}.") } invisible(NULL) }