Merge branch 'main' into f-2386-use-cli

r-lib · Dec 15, 2023 · 20d2253 · 20d2253
2 parents 6fe595f + fdbb9bf
commit 20d2253
Show file tree

Hide file tree

Showing 29 changed files with 462 additions and 165 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -74,7 +74,7 @@ Collate:
  'cache.R'
  'class_equals_linter.R'
  'commas_linter.R'
- 'comment_linters.R'
+ 'commented_code_linter.R'
  'comments.R'
  'comparison_negation_linter.R'
  'condition_call_linter.R'
@@ -141,6 +141,7 @@ Collate:
  'nested_ifelse_linter.R'
  'nested_pipe_linter.R'
  'nonportable_path_linter.R'
+ 'shared_constants.R'
  'nrow_subset_linter.R'
  'numeric_leading_zero_linter.R'
  'nzchar_linter.R'
@@ -173,7 +174,6 @@ Collate:
  'seq_linter.R'
  'settings.R'
  'settings_utils.R'
- 'shared_constants.R'
  'sort_linter.R'
  'source_utils.R'
  'spaces_inside_linter.R'
@@ -184,6 +184,7 @@ Collate:
  'strings_as_factors_linter.R'
  'system_file_linter.R'
  'terminal_close_linter.R'
+ 'todo_comment_linter.R'
  'trailing_blank_lines_linter.R'
  'trailing_whitespace_linter.R'
  'tree_utils.R'

diff --git a/NEWS.md b/NEWS.md
@@ -34,13 +34,17 @@
  + `allow_implicit_else` (default `TRUE`) which, when `FALSE`, checks that all terminal `if` statements are paired with a corresponding `else` statement (part of #884, @MichaelChirico).
  + `return_functions` to customize which functions are equivalent to `return()` as "exit" clauses, e.g. `rlang::abort()` can be considered in addition to the default functions like `stop()` and `q()` from base (#2271 and part of #884, @MichaelChirico and @MEO265).
  + `except` to customize which functions are ignored entirely (i.e., whether they have a return of the specified style is not checked; #2271 and part of #884, @MichaelChirico and @MEO265). Namespace hooks like `.onAttach()` and `.onLoad()` are always ignored.
+ + `except_regex`, the same purpose as `except=`, but filters functions by pattern. This is motivated by {RUnit}, where test suites are based on unit test functions matched by pattern, e.g. `^Test`, and where explicit return may be awkward (#2335, @MichaelChirico).
 * `unnecessary_lambda_linter` is extended to encourage vectorized comparisons where possible, e.g. `sapply(x, sum) > 0` instead of `sapply(x, function(x) sum(x) > 0)` (part of #884, @MichaelChirico). Toggle this behavior with argument `allow_comparison`.
 * `backport_linter()` is slightly faster by moving expensive computations outside the linting function (#2339, #2348, @AshesITR and @MichaelChirico).
 * `Linter()` has a new argument `linter_level` (default `NA`). This is used by `lint()` to more efficiently check for expression levels than the idiom `if (!is_lint_level(...)) { return(list()) }` (#2351, @AshesITR).
 * `string_boundary_linter()` recognizes regular expression calls like `grepl("^abc$", x)` that can be replaced by using `==` instead (#1613, @MichaelChirico).
 * `unreachable_code_linter()` has an argument `allow_comment_regex` for customizing which "terminal" comments to exclude (#2327, @MichaelChirico). `# nolint end` comments are always excluded, as are {covr} exclusions (e.g. `# nocov end`) by default.
 * `format()` and `print()` methods for `lint` and `lints` classes get a new option `width` to control the printing width of lint messages (#1884, @MichaelChirico). The default is controlled by a new option `lintr.format_width`; if unset, no wrapping occurs (matching earlier behavior).
+* `implicit_assignment_linter()` gets a custom message for the case of using `(` to induce printing like `(x <- foo())`; use an explicit call to `print()` for clarity (#2257, @MichaelChirico).
 * New function node caching for big efficiency gains to most linters (e.g. overall `lint_package()` improvement of 14-27% and core linting improvement up to 30%; #2357, @AshesITR). Most linters are written around function usage, and XPath performance searching for many functions is poor. The new `xml_find_function_calls()` entry in the `get_source_expressions()` output caches all function call nodes instead. See the vignette on creating linters for more details on how to use it.
+* `vector_logic_linter()` is extended to recognize incorrect usage of scalar operators `&&` and `||` inside subsetting expressions like `dplyr::filter(x, A && B)` (#2166, @MichaelChirico).
+* `any_is_na_linter()` is extended to catch the unusual usage `NA %in% x` (#2113, @MichaelChirico).
 
 ### New linters
 
@@ -58,7 +62,7 @@
 * `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico).
 * `if_switch_linter()` for encouraging `switch()` over repeated `if`/`else` tests (part of #884, @MichaelChirico).
 * `nested_pipe_linter()` for discouraging pipes within pipes, e.g. `df1 %>% inner_join(df2 %>% select(a, b))` (part of #884, @MichaelChirico).
-* `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (part of #884, @MichaelChirico).
+* `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (#2314 and part of #884, @MichaelChirico).
 * `pipe_return_linter()` for discouraging usage of `return()` inside a {magrittr} pipeline (part of #884, @MichaelChirico).
 * `one_call_pipe_linter()` for discouraging one-step pipelines like `x |> as.character()` (#2330 and part of #884, @MichaelChirico).
 * `object_overwrite_linter()` for discouraging re-use of upstream package exports as local variables (#2344, #2346 and part of #884, @MichaelChirico and @AshesITR).
@@ -71,6 +75,7 @@
  + ignores calls on the RHS of operators like `lapply(l, function(x) "a" %in% names(x))` (#2310, @MichaelChirico).
 * `vector_logic_linter()` recognizes some cases where bitwise `&`/`|` are used correctly (#1453, @MichaelChirico).
 * `expect_comparison_linter()` ignores faulty usage like `expect_true(x, y > z)` (#2083, @MichaelChirico). Note that `y > z` is being passed to the `info=` argument, so this is likely a mistake.
+* `consecutive_assertion_linter()` ignores cases where a second asssertion follows assignment with `=` (#2444, @MichaelChirico).
 
 ### Lint accuracy fixes: removing false negatives
 

diff --git a/R/any_is_na_linter.R b/R/any_is_na_linter.R
@@ -36,7 +36,7 @@
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
 any_is_na_linter <- function() {
- xpath <- "
+ any_xpath <- "
  parent::expr
  /following-sibling::expr[1][expr[1][SYMBOL_FUNCTION_CALL[text() = 'is.na']]]
  /parent::expr[
@@ -45,15 +45,28 @@ any_is_na_linter <- function() {
  ]
  "
 
+ in_xpath <- "//SPECIAL[text() = '%in%']/preceding-sibling::expr[NUM_CONST[starts-with(text(), 'NA')]]"
+
  Linter(linter_level = "expression", function(source_expression) {
+ xml <- source_expression$xml_parsed_content
  xml_calls <- source_expression$xml_find_function_calls("any")
- bad_expr <- xml_find_all(xml_calls, xpath)
 
- xml_nodes_to_lints(
- bad_expr,
+ any_expr <- xml_find_all(xml_calls, any_xpath)
+ any_lints <- xml_nodes_to_lints(
+ any_expr,
  source_expression = source_expression,
  lint_message = "anyNA(x) is better than any(is.na(x)).",
  type = "warning"
  )
+
+ in_expr <- xml_find_all(xml, in_xpath)
+ in_lints <- xml_nodes_to_lints(
+ in_expr,
+ source_expression = source_expression,
+ lint_message = "anyNA(x) is better than NA %in% x.",
+ type = "warning"
+ )
+
+ c(any_lints, in_lints)
  })
 }
diff --git a/R/comment_linters.R → R/commented_code_linter.R b/R/comment_linters.R → R/commented_code_linter.R
@@ -1,29 +1,3 @@
-ops <- list(
- "+",
- # "-",
- "=",
- "==",
- "!=",
- "<=",
- ">=",
- "<-",
- "<<-",
- "<",
- ">",
- "->",
- "->>",
- "%%",
- "/",
- "^",
- "*",
- "**",
- "|",
- "||",
- "&",
- "&&",
- rex("%", except_any_of("%"), "%")
-)
-
 #' Commented code linter
 #'
 #' Check that there is no commented code outside roxygen blocks.
@@ -60,6 +34,32 @@ ops <- list(
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
 commented_code_linter <- function() {
+ ops <- list(
+ "+",
+ # "-",
+ "=",
+ "==",
+ "!=",
+ "<=",
+ ">=",
+ "<-",
+ "<<-",
+ "<",
+ ">",
+ "->",
+ "->>",
+ "%%",
+ "/",
+ "^",
+ "*",
+ "**",
+ "|",
+ "||",
+ "&",
+ "&&",
+ rex("%", except_any_of("%"), "%")
+ )
+
  code_candidate_regex <- rex(
  some_of("#"),
  any_spaces,
@@ -117,69 +117,3 @@ parsable <- function(x) {
  res <- try_silently(parse(text = x))
  !inherits(res, "try-error")
 }
-
-
-#' TODO comment linter
-#'
-#' Check that the source contains no TODO comments (case-insensitive).
-#'
-#' @param todo Vector of strings that identify TODO comments.
-#'
-#' @examples
-#' # will produce lints
-#' lint(
-#' text = "x + y # TODO",
-#' linters = todo_comment_linter()
-#' )
-#'
-#' lint(
-#' text = "pi <- 1.0 # FIXME",
-#' linters = todo_comment_linter()
-#' )
-#'
-#' lint(
-#' text = "x <- TRUE # hack",
-#' linters = todo_comment_linter(todo = c("todo", "fixme", "hack"))
-#' )
-#'
-#' # okay
-#' lint(
-#' text = "x + y # my informative comment",
-#' linters = todo_comment_linter()
-#' )
-#'
-#' lint(
-#' text = "pi <- 3.14",
-#' linters = todo_comment_linter()
-#' )
-#'
-#' lint(
-#' text = "x <- TRUE",
-#' linters = todo_comment_linter()
-#' )
-#'
-#' @evalRd rd_tags("todo_comment_linter")
-#' @seealso [linters] for a complete list of linters available in lintr.
-#' @export
-todo_comment_linter <- function(todo = c("todo", "fixme")) {
- todo_comment_regex <- rex(one_or_more("#"), any_spaces, or(todo))
- Linter(function(source_expression) {
- tokens <- with_id(source_expression, ids_with_token(source_expression, "COMMENT"))
- are_todo <- re_matches(tokens[["text"]], todo_comment_regex, ignore.case = TRUE)
- tokens <- tokens[are_todo, ]
- lapply(
- split(tokens, seq_len(nrow(tokens))),
- function(token) {
- Lint(
- filename = source_expression[["filename"]],
- line_number = token[["line1"]],
- column_number = token[["col1"]],
- type = "style",
- message = "Remove TODO comments.",
- line = source_expression[["lines"]][[as.character(token[["line1"]])]],
- ranges = list(c(token[["col1"]], token[["col2"]]))
- )
- }
- )
- })
-}
diff --git a/R/condition_call_linter.R b/R/condition_call_linter.R
@@ -6,10 +6,10 @@
 #'
 #' @param display_call Logical specifying expected behaviour regarding `call.`
 #' argument in conditions.
-#' - `NA` forces providing `call.=` but ignores its value (this can be used in
+#' - `NA` forces providing `call. =` but ignores its value (this can be used in
 #' cases where you expect a mix of `call. = FALSE` and `call. = TRUE`)
-#' - lints `call. = FALSE`
-#' - forces `call. = FALSE` (lints `call. = TRUE` or missing `call.=` value)
+#' - `TRUE` lints `call. = FALSE`
+#' - `FALSE` forces `call. = FALSE` (lints `call. = TRUE` or missing `call. =` value)
 #'
 #'
 #' @examples

diff --git a/R/consecutive_assertion_linter.R b/R/consecutive_assertion_linter.R
@@ -31,20 +31,23 @@
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
 consecutive_assertion_linter <- function() {
- stopifnot_xpath <- "
+ # annoying expr-but-not-really nodes
+ next_expr <- "following-sibling::*[self::expr or self::expr_or_assign_or_help or self::equal_assign][1]"
+
+ stopifnot_xpath <- glue("
  parent::expr
  /parent::expr[
- expr[1]/SYMBOL_FUNCTION_CALL = following-sibling::expr[1]/expr[1]/SYMBOL_FUNCTION_CALL
+ expr[1]/SYMBOL_FUNCTION_CALL = {next_expr}/expr[1]/SYMBOL_FUNCTION_CALL
  ]
- "
- assert_that_xpath <- "
+ ")
+ assert_that_xpath <- glue("
  parent::expr
  /parent::expr[
  not(SYMBOL_SUB[text() = 'msg'])
  and not(following-sibling::expr[1]/SYMBOL_SUB[text() = 'msg'])
- and expr[1]/SYMBOL_FUNCTION_CALL = following-sibling::expr[1]/expr[1]/SYMBOL_FUNCTION_CALL
+ and expr[1]/SYMBOL_FUNCTION_CALL = {next_expr}/expr[1]/SYMBOL_FUNCTION_CALL
  ]
- "
+ ")
 
  Linter(linter_level = "file", function(source_expression) {
  # need the full file to also catch usages at the top level

diff --git a/R/implicit_assignment_linter.R b/R/implicit_assignment_linter.R
@@ -102,21 +102,25 @@ implicit_assignment_linter <- function(except = c("bquote", "expression", "expr"
  )
  }
 
+ implicit_message <- paste(
+ "Avoid implicit assignments in function calls.",
+ "For example, instead of `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`."
+ )
+
+ print_message <- "Call print() explicitly instead of relying on implicit printing behavior via '('."
+
  Linter(linter_level = "file", function(source_expression) {
  # need the full file to also catch usages at the top level
  xml <- source_expression$full_xml_parsed_content
 
  bad_expr <- xml_find_all(xml, xpath)
 
- lint_message <- paste(
- "Avoid implicit assignments in function calls.",
- "For example, instead of `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`."
- )
+ print_only <- !is.na(xml_find_first(bad_expr, "parent::expr[parent::exprlist and *[1][self::OP-LEFT-PAREN]]"))
 
  xml_nodes_to_lints(
  bad_expr,
  source_expression = source_expression,
- lint_message = lint_message,
+ lint_message = ifelse(print_only, print_message, implicit_message),
  type = "warning"
  )
  })

diff --git a/R/nrow_subset_linter.R b/R/nrow_subset_linter.R
@@ -22,19 +22,25 @@
 #'
 #' @evalRd rd_tags("nrow_subset_linter")
 #' @seealso [linters] for a complete list of linters available in lintr.
+#' @include shared_constants.R
 #' @export
 nrow_subset_linter <- make_linter_from_function_xpath(
  function_names = "subset",
- xpath = "
+ xpath = glue("
  parent::expr
  /parent::expr
- /parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']]
- ",
+ /parent::expr[
+ expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']
+ or (self::expr | parent::expr)[
+ (PIPE or SPECIAL[{ xp_text_in_table(setdiff(magrittr_pipes, c('%$%', '%<>%'))) }])
+ and expr/expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']
+ ]
+ ]
+ "),
  lint_message = paste(
  "Use arithmetic to count the number of rows satisfying a condition,",
  "rather than fully subsetting the data.frame and counting the resulting rows.",
- "For example, replace nrow(subset(x, is_treatment))",
- "with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has",
- "missing values."
+ "For example, replace nrow(subset(x, is_treatment)) with sum(x$is_treatment).",
+ "NB: use na.rm = TRUE if `is_treatment` has missing values."
  )
 )