From fc3a6d5f9baf901dbf0a920a48fa962903aa835b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 13 Dec 2023 05:32:53 +0000 Subject: [PATCH] pipeline support --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/nrow_subset_linter.R | 18 ++++++++++++------ tests/testthat/test-nrow_subset_linter.R | 11 +++++++++++ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ffa56a981..cd889cd40 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -141,6 +141,7 @@ Collate: 'nested_ifelse_linter.R' 'nested_pipe_linter.R' 'nonportable_path_linter.R' + 'shared_constants.R' 'nrow_subset_linter.R' 'numeric_leading_zero_linter.R' 'nzchar_linter.R' @@ -173,7 +174,6 @@ Collate: 'seq_linter.R' 'settings.R' 'settings_utils.R' - 'shared_constants.R' 'sort_linter.R' 'spaces_inside_linter.R' 'spaces_left_parentheses_linter.R' diff --git a/NEWS.md b/NEWS.md index 5c8ec3b9e..c283d2a24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -57,7 +57,7 @@ * `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico). * `if_switch_linter()` for encouraging `switch()` over repeated `if`/`else` tests (part of #884, @MichaelChirico). * `nested_pipe_linter()` for discouraging pipes within pipes, e.g. `df1 %>% inner_join(df2 %>% select(a, b))` (part of #884, @MichaelChirico). -* `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (part of #884, @MichaelChirico). +* `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (#2314 and part of #884, @MichaelChirico). * `pipe_return_linter()` for discouraging usage of `return()` inside a {magrittr} pipeline (part of #884, @MichaelChirico). * `one_call_pipe_linter()` for discouraging one-step pipelines like `x |> as.character()` (#2330 and part of #884, @MichaelChirico). * `object_overwrite_linter()` for discouraging re-use of upstream package exports as local variables (#2344, #2346 and part of #884, @MichaelChirico and @AshesITR). diff --git a/R/nrow_subset_linter.R b/R/nrow_subset_linter.R index eb731e579..72a2a7adc 100644 --- a/R/nrow_subset_linter.R +++ b/R/nrow_subset_linter.R @@ -22,19 +22,25 @@ #' #' @evalRd rd_tags("nrow_subset_linter") #' @seealso [linters] for a complete list of linters available in lintr. +#' @include shared_constants.R #' @export nrow_subset_linter <- make_linter_from_xpath( - xpath = " + xpath = glue(" //SYMBOL_FUNCTION_CALL[text() = 'subset'] /parent::expr /parent::expr - /parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']] - ", + /parent::expr[ + expr/SYMBOL_FUNCTION_CALL[text() = 'nrow'] + or (self::expr | parent::expr)[ + (PIPE or SPECIAL[{ xp_text_in_table(setdiff(magrittr_pipes, c('%$%', '%<>%'))) }]) + and expr/expr/SYMBOL_FUNCTION_CALL[text() = 'nrow'] + ] + ] + "), lint_message = paste( "Use arithmetic to count the number of rows satisfying a condition,", "rather than fully subsetting the data.frame and counting the resulting rows.", - "For example, replace nrow(subset(x, is_treatment))", - "with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has", - "missing values." + "For example, replace nrow(subset(x, is_treatment)) with sum(x$is_treatment).", + "NB: use na.rm = TRUE if `is_treatment` has missing values." ) ) diff --git a/tests/testthat/test-nrow_subset_linter.R b/tests/testthat/test-nrow_subset_linter.R index 18a3e45d8..2594f00ef 100644 --- a/tests/testthat/test-nrow_subset_linter.R +++ b/tests/testthat/test-nrow_subset_linter.R @@ -29,3 +29,14 @@ test_that("lints vectorize", { nrow_subset_linter() ) }) + +test_that("linter is pipeline-aware", { + linter <- nrow_subset_linter() + lint_msg <- "Use arithmetic to count the number of rows satisfying a condition" + + expect_lint("x %>% subset(y == z) %>% nrow()", lint_msg, linter) + expect_lint("subset(x) %>% nrow()", lint_msg, linter) + + skip_if_not_r_version("4.1.0") + expect_lint("x |> subset(y == z) |> nrow()", lint_msg, linter) +})