From 610da60fd4060c75279ca400bce7f851e6275069 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sat, 19 Feb 2022 09:17:21 -0500 Subject: [PATCH] 0-2-0 Release candidate (#911) * skip for tune registration issues * rearrange * version bump * link doesn't exist yet --- DESCRIPTION | 2 +- NEWS.md | 33 ++++++++++++++++++--------------- tests/testthat/helpers.R | 10 ++++++++++ tests/testthat/test-extract.R | 6 ++++++ vignettes/Dummies.Rmd | 2 +- 5 files changed, 36 insertions(+), 17 deletions(-) create mode 100644 tests/testthat/helpers.R diff --git a/DESCRIPTION b/DESCRIPTION index 23b6e8323..5fe6e62ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: recipes Title: Preprocessing and Feature Engineering Steps for Modeling -Version: 0.1.17.9001 +Version: 0.2.0 Authors@R: c( person("Max", "Kuhn", , "max@rstudio.com", role = c("aut", "cre")), person("Hadley", "Wickham", , "hadley@rstudio.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 9cd4b8c00..1393f2aa7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,14 @@ -# recipes (development version) +# recipes 0.2.0 + +# New Steps + +* `step_nnmf_sparse()` uses a different implementation of non-negative matrix factorization that is much faster and enables regularized estimation. (#790) + +* `step_dummy_extract()` creates multiple variables from a character variable by extracting elements using regular expressions and counting those elements. + +* `step_filter_missing()` can filter columns based on proportion of missingness (#270). + +* `step_percentile()` replaces the value of a variable with its percentile from the training set. (#765) ## Improvements and Other Changes @@ -6,26 +16,14 @@ * Fixed bug in `step_harmonic()` printing and changed defaults to `role = "predictor"` and `keep_original_cols = FALSE` (#822). -* Added a new step called `step_filter_missing()`, which can filter columns based on proportion of missingness (#270). - * Improved the efficiency of computations for the Box-Cox transformation (#820). * When a feature extraction step (e.g., `step_pca()`, `step_ica()`, etc.) has zero components specified, the `tidy()` method now lists the selected columns in the `terms` column. -* Added a new step called `step_nnmf_sparse()` which uses a different implementation of non-negative matrix factorization that is much faster and enables regularized estimation. (#790) - * Deprecation has started for `step_nnmf()` in favor of `step_nnmf_sparse()`. (#790) * Steps now have a dedicated subsection detailing what happens when `tidy()` is applied. (#876) -* Added a new step called `step_dummy_extract()` which creates multiple variables from a character variable by extracting elements using regular expressions and counting those elements. - -## Breaking Changes - -* `step_ica()` now indirectly uses the `fastICA` package since that package has increased their R version requirement. Recipe objects from previous versions will error when applied to new data. (#823) - -* `step_kpca*()` now directly use the `kernlab` package. Recipe objects from previous versions will error when applied to new data. - * `step_ica()` now runs `fastICA()` using a specific set of random numbers so that initialization is reproducible. * `tidy.recipe()` now returns a zero row tibble instead of an error when applied to a empty recipe. (#867) @@ -34,12 +32,17 @@ * `detect_step()` is no longer restricted to steps created in recipes (#869). -* Added a new step called `step_percentile()`, that replaces the value of a variable with its percentile from the training set. (#765) - * New `extract_parameter_set_dials()` and `extract_parameter_dials()` methods to extract parameter sets and single parameters from `recipe` objects. * `step_other()` now allow for setting `threshold = 0` which will result in no othering. (#904) +## Breaking Changes + +* `step_ica()` now indirectly uses the `fastICA` package since that package has increased their R version requirement. Recipe objects from previous versions will error when applied to new data. (#823) + +* `step_kpca*()` now directly use the `kernlab` package. Recipe objects from previous versions will error when applied to new data. + + ## Developer * The print methods have been internally changes to use `print_step()` instead of `printer()`. This is done for a smoother transition to use `cli` in the next version. (#871) diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R new file mode 100644 index 000000000..6bd3f9c85 --- /dev/null +++ b/tests/testthat/helpers.R @@ -0,0 +1,10 @@ + +tune_check <- function() { + if (rlang::is_installed("tune")) { + res <- utils::packageVersion("tune") <= "0.1.6" + } else { + res <- TRUE + } + res +} + diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index b6aced539..b8b75faa5 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -1,5 +1,6 @@ test_that('extract parameter set from recipe with no steps', { + skip_if(tune_check()) bare_rec <- recipe(mpg ~ ., data = mtcars) bare_info <- extract_parameter_set_dials(bare_rec) @@ -8,6 +9,7 @@ test_that('extract parameter set from recipe with no steps', { }) test_that('extract parameter set from recipe with no tunable parameters', { + skip_if(tune_check()) rm_rec <- recipe(mpg ~ ., data = mtcars) %>% step_rm(hp) @@ -18,6 +20,7 @@ test_that('extract parameter set from recipe with no tunable parameters', { }) test_that('extract parameter set from recipe with tunable parameters', { + skip_if(tune_check()) spline_rec <- recipe(mpg ~ ., data = mtcars) %>% step_impute_knn(all_numeric_predictors(), neighbors = hardhat::tune("imputation")) %>% @@ -49,6 +52,7 @@ test_that('extract parameter set from recipe with tunable parameters', { # ------------------------------------------------------------------------- test_that('extract single parameter from recipe with no steps', { + skip_if(tune_check()) bare_rec <- recipe(mpg ~ ., data = mtcars) expect_error( @@ -57,6 +61,7 @@ test_that('extract single parameter from recipe with no steps', { }) test_that('extract single parameter from recipe with no tunable parameters', { + skip_if(tune_check()) rm_rec <- recipe(mpg ~ ., data = mtcars) %>% step_rm(hp) @@ -67,6 +72,7 @@ test_that('extract single parameter from recipe with no tunable parameters', { }) test_that('extract single parameter from recipe with tunable parameters', { + skip_if(tune_check()) spline_rec <- recipe(mpg ~ ., data = mtcars) %>% step_impute_knn(all_numeric_predictors(), neighbors = hardhat::tune("imputation")) %>% diff --git a/vignettes/Dummies.Rmd b/vignettes/Dummies.Rmd index 163445653..b7507ba81 100644 --- a/vignettes/Dummies.Rmd +++ b/vignettes/Dummies.Rmd @@ -245,7 +245,7 @@ There are a bunch of steps related to going in-between factors and dummy variabl * [`step_zv`](https://recipes.tidymodels.org/reference/step_zv.html) can remove dummy variables that never show a 1 in the column (i.e. is zero-variance). * [`step_bin2factor`](https://recipes.tidymodels.org/reference/step_bin2factor.html) takes a binary indicator and makes a factor variable. This can be useful when using naive Bayes models. * `step_embed`, `step_lencode_glm`, `step_lencode_bayes` and others in the [`embed`](https://github.com/tidymodels/embed) package can use one or more (non-binary) values to encode factor predictors into a numeric form. - * [`step_dummy_extract`](https://recipes.tidymodels.org/reference/step_dummy_extract.html) can create binary indicators from strings and is especially useful for multiple choice columns. + * `step_dummy_extract` can create binary indicators from strings and is especially useful for multiple choice columns. [`step_dummy`](https://recipes.tidymodels.org/reference/step_dummy.html) also works with _ordered factors_. As seen above, the default encoding is to create a series of polynomial variables. There are also a few steps for ordered factors: