From 7fa9f37e8647afe71111eaddd9d4fb0056fbfec1 Mon Sep 17 00:00:00 2001 From: topepo Date: Sun, 18 Nov 2018 09:09:15 -0500 Subject: [PATCH 1/5] Changes requested by CRAN Examples for `step_isomap` fail due to missing dependency: ```r > im_estimates <- prep(im_trans, training = biomass_tr) Error in chckpkg("RSpectra") : require 'RSpectra' package, install it using install.packages('RSpectra') Calls: prep ... embed -> .local -> do.call -> -> chckpkg Execution halted ``` --- DESCRIPTION | 3 ++- R/ica.R | 23 +++++++++++++---------- R/isomap.R | 26 ++++++++++++++------------ R/kpca.R | 26 ++++++++++++++------------ R/nnmf.R | 20 ++++++++++---------- man/step_ica.Rd | 15 +++++++++------ man/step_isomap.Rd | 16 +++++++++------- man/step_kpca.Rd | 16 +++++++++------- man/step_nnmf.Rd | 20 ++++++++++---------- 9 files changed, 90 insertions(+), 75 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f3c2b44bf..9ea0a4495 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: recipes Title: Preprocessing Tools to Create Design Matrices -Version: 0.1.3.9002 +Version: 0.1.4 Authors@R: c( person("Max", "Kuhn", , "max@rstudio.com", c("aut", "cre")), person("Hadley", "Wickham", , "hadley@rstudio.com", "aut"), @@ -48,6 +48,7 @@ Suggests: rmarkdown, rpart, rsample, + RSpectra, testthat License: GPL-2 VignetteBuilder: knitr diff --git a/R/ica.R b/R/ica.R index 7d0ea89fa..2aeb41d15 100644 --- a/R/ica.R +++ b/R/ica.R @@ -25,9 +25,9 @@ #' @param res The [fastICA::fastICA()] object is stored #' here once this preprocessing step has be trained by #' [prep.recipe()]. -#' @param num The number of components to retain (this will be -#' deprecated in factor of `num_comp` in version 0.1.5). `num_comp` -#' will override this option. +#' @param num The number of components to retain (this will be +#' deprecated in factor of `num_comp` in version 0.1.5). `num_comp` +#' will override this option. #' @param prefix A character string that will be the prefix to the #' resulting new variables. See notes below. #' @return An updated version of `recipe` with the new step @@ -82,14 +82,17 @@ #' ica_trans <- step_center(rec, V1, V2) #' ica_trans <- step_scale(ica_trans, V1, V2) #' ica_trans <- step_ica(ica_trans, V1, V2, num_comp = 2) -#' # ica_estimates <- prep(ica_trans, training = tr) -#' # ica_data <- bake(ica_estimates, te) #' -#' # plot(te$V1, te$V2) -#' # plot(ica_data$IC1, ica_data$IC2) +#' if (require(dimRed) & require(fastICA)) { +#' ica_estimates <- prep(ica_trans, training = tr) +#' ica_data <- bake(ica_estimates, te) #' -#' # tidy(ica_trans, number = 3) -#' # tidy(ica_estimates, number = 3) +#' plot(te$V1, te$V2) +#' plot(ica_data$IC1, ica_data$IC2) +#' +#' tidy(ica_trans, number = 3) +#' tidy(ica_estimates, number = 3) +#' } #' @seealso [step_pca()] [step_kpca()] #' [step_isomap()] [recipe()] [prep.recipe()] #' [bake.recipe()] @@ -108,7 +111,7 @@ step_ica <- recipes_pkg_check(c("dimRed", "fastICA")) - if (!is.null(num)) + if (!is.null(num)) message("The argument `num` is deprecated in factor of `num_comp`. ", "`num` will be removed in next version.", call. = FALSE) add_step( diff --git a/R/isomap.R b/R/isomap.R index f426d2d9a..486e5d9ed 100644 --- a/R/isomap.R +++ b/R/isomap.R @@ -20,9 +20,9 @@ #' used. #' @param neighbors The number of neighbors. #' @param options A list of options to [dimRed::Isomap()]. -#' @param num The number of isomap dimensions (this will be deprecated -#' in factor of `num_terms` in version 0.1.5). `num_terms` will -#' override this option. +#' @param num The number of isomap dimensions (this will be deprecated +#' in factor of `num_terms` in version 0.1.5). `num_terms` will +#' override this option. #' @param res The [dimRed::Isomap()] object is stored #' here once this preprocessing step has be trained by #' [prep.recipe()]. @@ -85,16 +85,18 @@ #' neighbors = 100, #' num_terms = 2) #' -#' # im_estimates <- prep(im_trans, training = biomass_tr) +#' if (require(dimRed) & require(RSpectra)) { +#' im_estimates <- prep(im_trans, training = biomass_tr) #' -#' # im_te <- bake(im_estimates, biomass_te) +#' im_te <- bake(im_estimates, biomass_te) #' -#' # rng <- extendrange(c(im_te$Isomap1, im_te$Isomap2)) -#' # plot(im_te$Isomap1, im_te$Isomap2, -#' # xlim = rng, ylim = rng) +#' rng <- extendrange(c(im_te$Isomap1, im_te$Isomap2)) +#' plot(im_te$Isomap1, im_te$Isomap2, +#' xlim = rng, ylim = rng) #' -#' # tidy(im_trans, number = 4) -#' # tidy(im_estimates, number = 4) +#' tidy(im_trans, number = 4) +#' tidy(im_estimates, number = 4) +#' } #' } #' @seealso [step_pca()] [step_kpca()] #' [step_ica()] [recipe()] [prep.recipe()] @@ -115,7 +117,7 @@ step_isomap <- id = rand_id("isomap")) { recipes_pkg_check(c("dimRed", "RSpectra", "igraph", "RANN")) - if (!is.null(num)) + if (!is.null(num)) message("The argument `num` is deprecated in factor of `num_terms`. ", "`num` will be removed in next version.", call. = FALSE) add_step( @@ -137,7 +139,7 @@ step_isomap <- } step_isomap_new <- - function(terms, role, trained, num_terms, neighbors, options, res, num, + function(terms, role, trained, num_terms, neighbors, options, res, num, prefix, skip, id) { step( subclass = "isomap", diff --git a/R/kpca.R b/R/kpca.R index 949256c72..8db42c850 100644 --- a/R/kpca.R +++ b/R/kpca.R @@ -26,9 +26,9 @@ #' @param res An S4 [kernlab::kpca()] object is stored #' here once this preprocessing step has be trained by #' [prep.recipe()]. -#' @param num The number of components to retain (this will be -#' deprecated in factor of `num_comp` in version 0.1.5). `num_comp` -#' will override this option. +#' @param num The number of components to retain (this will be +#' deprecated in factor of `num_comp` in version 0.1.5). `num_comp` +#' will override this option. #' @param prefix A character string that will be the prefix to the #' resulting new variables. See notes below. #' @return An updated version of `recipe` with the new step @@ -94,16 +94,18 @@ #' step_scale(all_predictors()) %>% #' step_kpca(all_predictors()) #' -#' # kpca_estimates <- prep(kpca_trans, training = biomass_tr) +#' if (require(dimRed) & require(kernlab)) { +#' kpca_estimates <- prep(kpca_trans, training = biomass_tr) #' -#' # kpca_te <- bake(kpca_estimates, biomass_te) +#' kpca_te <- bake(kpca_estimates, biomass_te) #' -#' # rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2)) -#' # plot(kpca_te$kPC1, kpca_te$kPC2, -#' # xlim = rng, ylim = rng) +#' rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2)) +#' plot(kpca_te$kPC1, kpca_te$kPC2, +#' xlim = rng, ylim = rng) #' -#' # tidy(kpca_trans, number = 4) -#' # tidy(kpca_estimates, number = 4) +#' tidy(kpca_trans, number = 4) +#' tidy(kpca_estimates, number = 4) +#' } #' @seealso [step_pca()] [step_ica()] #' [step_isomap()] [recipe()] [prep.recipe()] #' [bake.recipe()] @@ -123,10 +125,10 @@ step_kpca <- id = rand_id("kpca")) { recipes_pkg_check(c("dimRed", "kernlab")) - if (!is.null(num)) + if (!is.null(num)) message("The argument `num` is deprecated in factor of `num_comp`. ", "`num` will be removed in next version.", call. = FALSE) - + add_step( recipe, step_kpca_new( diff --git a/R/nnmf.R b/R/nnmf.R index 345de1dc5..cd96d07a9 100644 --- a/R/nnmf.R +++ b/R/nnmf.R @@ -52,20 +52,20 @@ #' #' @examples #' \donttest{ -#' library(dimRed) -#' library(NMF) #' data(biomass) #' -#' rec <- recipe(HHV ~ ., data = biomass) %>% -#' update_role(sample, new_role = "id var") %>% -#' update_role(dataset, new_role = "split variable") %>% -#' step_nnmf(all_predictors(), num_comp = 2, seed = 473, num_run = 2) %>% -#' prep(training = biomass, retain = TRUE) +#' if (require(dimRed) & require(NMF)) { +#' rec <- recipe(HHV ~ ., data = biomass) %>% +#' update_role(sample, new_role = "id var") %>% +#' update_role(dataset, new_role = "split variable") %>% +#' step_nnmf(all_predictors(), num_comp = 2, seed = 473, num_run = 2) %>% +#' prep(training = biomass, retain = TRUE) #' -#' # juice(rec) +#' juice(rec) #' -#' # library(ggplot2) -#' # ggplot(juice(rec), aes(x = NNMF2, y = NNMF1, col = HHV)) + geom_point() +#' library(ggplot2) +#' ggplot(juice(rec), aes(x = NNMF2, y = NNMF1, col = HHV)) + geom_point() +#' } #' } #' @seealso [step_pca()], [step_ica()], [step_kpca()], #' [step_isomap()], [recipe()], [prep.recipe()], diff --git a/man/step_ica.Rd b/man/step_ica.Rd index 6b4192645..d9362e0a9 100644 --- a/man/step_ica.Rd +++ b/man/step_ica.Rd @@ -113,14 +113,17 @@ rec <- recipe( ~ ., data = tr) ica_trans <- step_center(rec, V1, V2) ica_trans <- step_scale(ica_trans, V1, V2) ica_trans <- step_ica(ica_trans, V1, V2, num_comp = 2) -# ica_estimates <- prep(ica_trans, training = tr) -# ica_data <- bake(ica_estimates, te) -# plot(te$V1, te$V2) -# plot(ica_data$IC1, ica_data$IC2) +if (require(dimRed) & require(fastICA)) { + ica_estimates <- prep(ica_trans, training = tr) + ica_data <- bake(ica_estimates, te) -# tidy(ica_trans, number = 3) -# tidy(ica_estimates, number = 3) + plot(te$V1, te$V2) + plot(ica_data$IC1, ica_data$IC2) + + tidy(ica_trans, number = 3) + tidy(ica_estimates, number = 3) +} } \references{ Hyvarinen, A., and Oja, E. (2000). Independent diff --git a/man/step_isomap.Rd b/man/step_isomap.Rd index 469d6dde4..93a0224fb 100644 --- a/man/step_isomap.Rd +++ b/man/step_isomap.Rd @@ -114,16 +114,18 @@ im_trans <- rec \%>\% neighbors = 100, num_terms = 2) -# im_estimates <- prep(im_trans, training = biomass_tr) +if (require(dimRed) & require(RSpectra)) { + im_estimates <- prep(im_trans, training = biomass_tr) -# im_te <- bake(im_estimates, biomass_te) + im_te <- bake(im_estimates, biomass_te) -# rng <- extendrange(c(im_te$Isomap1, im_te$Isomap2)) -# plot(im_te$Isomap1, im_te$Isomap2, -# xlim = rng, ylim = rng) + rng <- extendrange(c(im_te$Isomap1, im_te$Isomap2)) + plot(im_te$Isomap1, im_te$Isomap2, + xlim = rng, ylim = rng) -# tidy(im_trans, number = 4) -# tidy(im_estimates, number = 4) + tidy(im_trans, number = 4) + tidy(im_estimates, number = 4) +} } } \references{ diff --git a/man/step_kpca.Rd b/man/step_kpca.Rd index 653fb5e2e..2a244a4b1 100644 --- a/man/step_kpca.Rd +++ b/man/step_kpca.Rd @@ -122,16 +122,18 @@ kpca_trans <- rec \%>\% step_scale(all_predictors()) \%>\% step_kpca(all_predictors()) -# kpca_estimates <- prep(kpca_trans, training = biomass_tr) +if (require(dimRed) & require(kernlab)) { + kpca_estimates <- prep(kpca_trans, training = biomass_tr) -# kpca_te <- bake(kpca_estimates, biomass_te) + kpca_te <- bake(kpca_estimates, biomass_te) -# rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2)) -# plot(kpca_te$kPC1, kpca_te$kPC2, -# xlim = rng, ylim = rng) + rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2)) + plot(kpca_te$kPC1, kpca_te$kPC2, + xlim = rng, ylim = rng) -# tidy(kpca_trans, number = 4) -# tidy(kpca_estimates, number = 4) + tidy(kpca_trans, number = 4) + tidy(kpca_estimates, number = 4) +} } \references{ Scholkopf, B., Smola, A., and Muller, K. (1997). diff --git a/man/step_nnmf.Rd b/man/step_nnmf.Rd index 182c21b72..27522c835 100644 --- a/man/step_nnmf.Rd +++ b/man/step_nnmf.Rd @@ -89,20 +89,20 @@ If \code{num = 101}, the names would be \code{NNMF001} - } \examples{ \donttest{ -library(dimRed) -library(NMF) data(biomass) -rec <- recipe(HHV ~ ., data = biomass) \%>\% - update_role(sample, new_role = "id var") \%>\% - update_role(dataset, new_role = "split variable") \%>\% - step_nnmf(all_predictors(), num_comp = 2, seed = 473, num_run = 2) \%>\% - prep(training = biomass, retain = TRUE) +if (require(dimRed) & require(NMF)) { + rec <- recipe(HHV ~ ., data = biomass) \%>\% + update_role(sample, new_role = "id var") \%>\% + update_role(dataset, new_role = "split variable") \%>\% + step_nnmf(all_predictors(), num_comp = 2, seed = 473, num_run = 2) \%>\% + prep(training = biomass, retain = TRUE) -# juice(rec) + juice(rec) -# library(ggplot2) -# ggplot(juice(rec), aes(x = NNMF2, y = NNMF1, col = HHV)) + geom_point() + library(ggplot2) + ggplot(juice(rec), aes(x = NNMF2, y = NNMF1, col = HHV)) + geom_point() +} } } \seealso{ From 39b0c4e1e45a0a6fd30202ccdd15c9ac04643a4e Mon Sep 17 00:00:00 2001 From: topepo Date: Sun, 18 Nov 2018 10:29:10 -0500 Subject: [PATCH 2/5] global variable false positive --- R/integer.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/integer.R b/R/integer.R index 8699aada1..ec6a9ebba 100644 --- a/R/integer.R +++ b/R/integer.R @@ -21,7 +21,7 @@ #' @param strict A logical for whether the values should be returned as #' integers (as opposed to double). #' @param zero_based A logical for whether the integers should start at zero and -#' new values be appended as the largest integer. +#' new values be appended as the largest integer. #' @return An updated version of `recipe` with the new step added #' to the sequence of existing steps (if any). For the `tidy` #' method, a tibble with columns `terms` (the selectors or @@ -38,7 +38,7 @@ #' argument above). Missing values propagate. #' #' Factor inputs are ordered by their levels. All others are -#' ordered by `sort`. +#' ordered by `sort`. #' #' Despite the name, the new values are returned as numeric unless #' `strict = TRUE`, which will coerce the results to integers. @@ -144,12 +144,12 @@ prep.step_integer <- function(x, training, info = NULL, ...) { map_key_to_int <- function(dat, key, strict = FALSE, zero = FALSE) { if (is.factor(dat)) dat <- as.character(dat) - + res <- full_join(tibble(value = dat, .row = seq_along(dat)), key, by = "value") res <- dplyr::filter(res, !is.na(.row)) res <- arrange(res, .row) if (zero) { - res$integer[is.na(res$integer) & !is.na(res$value)] <- + res$integer[is.na(res$integer) & !is.na(res$value)] <- max(key$integer, na.rm = TRUE) + 1 } else { res$integer[is.na(res$integer) & !is.na(res$value)] <- 0 @@ -200,3 +200,6 @@ tidy.step_integer <- function(x, ...) { res$id <- x$id res } + +#' @importFrom stats runif +utils::globalVariables(c(".row")) From d7f5df9622f75ca81d30a35851ab9f074f50111a Mon Sep 17 00:00:00 2001 From: topepo Date: Sun, 18 Nov 2018 10:29:26 -0500 Subject: [PATCH 3/5] isomap values are unique up to sign --- tests/testthat/test_isomap.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_isomap.R b/tests/testthat/test_isomap.R index 9417d0888..0517328b6 100644 --- a/tests/testthat/test_isomap.R +++ b/tests/testthat/test_isomap.R @@ -29,7 +29,7 @@ test_that('correct Isomap values', { skip_if_not_installed("igraph") skip_if_not_installed("RANN") skip_if_not_installed("dimRed") - + im_rec <- rec %>% step_isomap(x1, x2, x3, neighbors = 3, num_terms = 3, id = "") @@ -37,7 +37,8 @@ test_that('correct Isomap values', { im_pred <- bake(im_trained, new_data = dat2) - all.equal(as.matrix(im_pred), as.matrix(exp_res)) + # unique up to sign + all.equal(abs(as.matrix(im_pred)), abs(as.matrix(exp_res))) im_tibble <- tibble(terms = c("x1", "x2", "x3"), id = "") @@ -61,7 +62,7 @@ test_that('printing', { skip_if_not_installed("igraph") skip_if_not_installed("RANN") skip_if_not_installed("dimRed") - + im_rec <- rec %>% step_isomap(x1, x2, x3, neighbors = 3, num_terms = 3) expect_output(print(im_rec)) From 9f77b639f5310360a654b26310babb452054bed1 Mon Sep 17 00:00:00 2001 From: topepo Date: Sun, 18 Nov 2018 13:23:39 -0500 Subject: [PATCH 4/5] skip if early R version --- tests/testthat/test_isomap.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test_isomap.R b/tests/testthat/test_isomap.R index 0517328b6..414719e96 100644 --- a/tests/testthat/test_isomap.R +++ b/tests/testthat/test_isomap.R @@ -29,6 +29,7 @@ test_that('correct Isomap values', { skip_if_not_installed("igraph") skip_if_not_installed("RANN") skip_if_not_installed("dimRed") + skip_if(getRversion() <= "3.4.4") im_rec <- rec %>% step_isomap(x1, x2, x3, neighbors = 3, num_terms = 3, id = "") @@ -62,6 +63,7 @@ test_that('printing', { skip_if_not_installed("igraph") skip_if_not_installed("RANN") skip_if_not_installed("dimRed") + skip_if(getRversion() <= "3.4.4") im_rec <- rec %>% step_isomap(x1, x2, x3, neighbors = 3, num_terms = 3) From 335ac68997d38c6737518c6c007a4e6fdfdf131d Mon Sep 17 00:00:00 2001 From: topepo Date: Sun, 18 Nov 2018 13:48:53 -0500 Subject: [PATCH 5/5] minor format change --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d81060fba..79760f1c8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # `recipes` 0.1.3.9002 ## Breaking Changes + * Several argument names were changed to be consistent with other `tidymodels` packages (e.g. `dials`) and the general tidyverse naming conventions. * `K` in `step_knnimpute` was changed to `neighbors`. `step_isomap` had the number of neighbors promoted to a main argument called `neighbors ` * `step_pca`, `step_pls`, `step_kpca`, `step_ica` now use `num_comp` instead of `num`. , `step_isomap` uses `num_terms` instead of `num`.