From e97be9bcb2c2228dc4d8d4213025d0b815b3185c Mon Sep 17 00:00:00 2001
From: F-Noelle <fabian.noelle@gmail.com>
Date: Tue, 21 May 2024 21:24:46 +0200
Subject: [PATCH] Make `scalar_in_linter()` configurable (#2574)

* Update scalar_in_linter.R (#1)

Make scalar_in_linter configurable to allow projects to define additional %in% style functions like %notin%.

* Incorporate review feedback (#2)

* Incorporate review feedback

- Use glue in xpath,
- Add changes to NEWS
- Change default for scalar_in_linter
- Make lint msg more open ended if another %in% operator was linted
- Update tests to match new bahaviour

* Add a vector of in operators (#3)

* Add a testcase based on configuration (#4)

* Improve lint message, NEWS, param documentation (#5)

* Improve lint message (#6)

* Update R/scalar_in_linter.R

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Update lint message (#7)

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
---
 NEWS.md                                |  1 +
 R/scalar_in_linter.R                   | 22 ++++++++++--------
 inst/lintr/linters.csv                 |  2 +-
 man/configurable_linters.Rd            |  1 +
 man/linters.Rd                         |  4 ++--
 man/scalar_in_linter.Rd                | 13 +++++++----
 tests/testthat/test-scalar_in_linter.R | 32 +++++++++++++++++++-------
 7 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index dc089c28f..333fc6396 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -16,6 +16,7 @@
 * `unnecessary_nested_if_linter()` is deprecated and subsumed into the new/more general `unnecessary_nesting_linter()`.
 * Drop support for posting GitHub comments from inside GitHub comment bot, Travis, Wercker, and Jenkins CI tools (spurred by #2148, @MichaelChirico). We rely on GitHub Actions for linting in CI, and don't see any active users relying on these alternatives. We welcome and encourage community contributions to get support for different CI system going again.
 * `cyclocomp_linter()` is no longer part of the default linters (#2555, @IndrajeetPatil) because the tidyverse style guide doesn't contain any guidelines on meeting certain complexity requirements. Note that users with `cyclocomp_linter()` in their configs may now need to install {cyclocomp} intentionally, in particular in CI/CD pipelines.
+* `scalar_in_linter` is now configurable to allow other `%in%` like operators to be linted. The data.table operator `%chin%` is no longer linted by default; use `in_operators = "%chin%"` to continue linting it. (@F-Noelle)
 
 ## Bug fixes
 
diff --git a/R/scalar_in_linter.R b/R/scalar_in_linter.R
index 77ca70285..e6c9faace 100644
--- a/R/scalar_in_linter.R
+++ b/R/scalar_in_linter.R
@@ -1,12 +1,14 @@
 #' Block usage like x %in% "a"
 #'
 #' `vector %in% set` is appropriate for matching a vector to a set, but if
-#'   that set has size 1, `==` is more appropriate. `%chin%` from `{data.table}`
-#'   is matched as well.
+#'   that set has size 1, `==` is more appropriate.
 #'
 #' `scalar %in% vector` is OK, because the alternative (`any(vector == scalar)`)
 #'   is more circuitous & potentially less clear.
 #'
+#' @param in_operators Character vector of additional infix operators that behave like the `%in%` operator,
+#'   e.g. `{data.table}`'s `%chin%` operator.
+#'
 #' @examples
 #' # will produce lints
 #' lint(
@@ -16,7 +18,7 @@
 #'
 #' lint(
 #'   text = "x %chin% 'a'",
-#'   linters = scalar_in_linter()
+#'   linters = scalar_in_linter(in_operators = "%chin%")
 #' )
 #'
 #' # okay
@@ -28,22 +30,24 @@
 #' @evalRd rd_tags("scalar_in_linter")
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
-scalar_in_linter <- function() {
+scalar_in_linter <- function(in_operators = NULL) {
   # TODO(#2085): Extend to include other cases where the RHS is clearly a scalar
   # NB: all of logical, integer, double, hex, complex are parsed as NUM_CONST
-  xpath <- "
-  //SPECIAL[text() = '%in%' or text() = '%chin%']
+  xpath <- glue("
+  //SPECIAL[{xp_text_in_table(c('%in%', {in_operators}))}]
     /following-sibling::expr[NUM_CONST[not(starts-with(text(), 'NA'))] or STR_CONST]
     /parent::expr
-  "
+  ")
 
   Linter(linter_level = "expression", function(source_expression) {
     xml <- source_expression$xml_parsed_content
 
     bad_expr <- xml_find_all(xml, xpath)
     in_op <- xml_find_chr(bad_expr, "string(SPECIAL)")
-    lint_msg <-
-      paste0("Use == to match length-1 scalars, not ", in_op, ". Note that == preserves NA where ", in_op, " does not.")
+    lint_msg <- glue(
+      "Use comparison operators (e.g. ==, !=, etc.) to match length-1 scalars instead of {in_op}. ",
+      "Note that comparison operators preserve NA where {in_op} does not."
+    )
 
     xml_nodes_to_lints(
       bad_expr,
diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv
index 539cdbc98..95af98b61 100644
--- a/inst/lintr/linters.csv
+++ b/inst/lintr/linters.csv
@@ -90,7 +90,7 @@ repeat_linter,style readability
 return_linter,style configurable default
 routine_registration_linter,best_practices efficiency robustness
 sample_int_linter,efficiency readability robustness
-scalar_in_linter,readability consistency best_practices efficiency
+scalar_in_linter,readability consistency best_practices efficiency configurable
 semicolon_linter,style readability default configurable
 semicolon_terminator_linter,defunct
 seq_linter,robustness efficiency consistency best_practices default
diff --git a/man/configurable_linters.Rd b/man/configurable_linters.Rd
index cb1c17a54..1c72fffab 100644
--- a/man/configurable_linters.Rd
+++ b/man/configurable_linters.Rd
@@ -44,6 +44,7 @@ The following linters are tagged with 'configurable':
 \item{\code{\link{quotes_linter}}}
 \item{\code{\link{redundant_ifelse_linter}}}
 \item{\code{\link{return_linter}}}
+\item{\code{\link{scalar_in_linter}}}
 \item{\code{\link{semicolon_linter}}}
 \item{\code{\link{string_boundary_linter}}}
 \item{\code{\link{todo_comment_linter}}}
diff --git a/man/linters.Rd b/man/linters.Rd
index d2ba40da1..394bd6126 100644
--- a/man/linters.Rd
+++ b/man/linters.Rd
@@ -19,7 +19,7 @@ The following tags exist:
 \itemize{
 \item{\link[=best_practices_linters]{best_practices} (63 linters)}
 \item{\link[=common_mistakes_linters]{common_mistakes} (11 linters)}
-\item{\link[=configurable_linters]{configurable} (43 linters)}
+\item{\link[=configurable_linters]{configurable} (44 linters)}
 \item{\link[=consistency_linters]{consistency} (32 linters)}
 \item{\link[=correctness_linters]{correctness} (7 linters)}
 \item{\link[=default_linters]{default} (25 linters)}
@@ -123,7 +123,7 @@ The following linters exist:
 \item{\code{\link{return_linter}} (tags: configurable, default, style)}
 \item{\code{\link{routine_registration_linter}} (tags: best_practices, efficiency, robustness)}
 \item{\code{\link{sample_int_linter}} (tags: efficiency, readability, robustness)}
-\item{\code{\link{scalar_in_linter}} (tags: best_practices, consistency, efficiency, readability)}
+\item{\code{\link{scalar_in_linter}} (tags: best_practices, configurable, consistency, efficiency, readability)}
 \item{\code{\link{semicolon_linter}} (tags: configurable, default, readability, style)}
 \item{\code{\link{seq_linter}} (tags: best_practices, consistency, default, efficiency, robustness)}
 \item{\code{\link{sort_linter}} (tags: best_practices, efficiency, readability)}
diff --git a/man/scalar_in_linter.Rd b/man/scalar_in_linter.Rd
index be94fd1a1..1773c699f 100644
--- a/man/scalar_in_linter.Rd
+++ b/man/scalar_in_linter.Rd
@@ -4,12 +4,15 @@
 \alias{scalar_in_linter}
 \title{Block usage like x \%in\% "a"}
 \usage{
-scalar_in_linter()
+scalar_in_linter(in_operators = NULL)
+}
+\arguments{
+\item{in_operators}{Character vector of additional infix operators that behave like the \code{\%in\%} operator,
+e.g. \code{{data.table}}'s \verb{\%chin\%} operator.}
 }
 \description{
 \code{vector \%in\% set} is appropriate for matching a vector to a set, but if
-that set has size 1, \code{==} is more appropriate. \verb{\%chin\%} from \code{{data.table}}
-is matched as well.
+that set has size 1, \code{==} is more appropriate.
 }
 \details{
 \code{scalar \%in\% vector} is OK, because the alternative (\code{any(vector == scalar)})
@@ -24,7 +27,7 @@ lint(
 
 lint(
   text = "x \%chin\% 'a'",
-  linters = scalar_in_linter()
+  linters = scalar_in_linter(in_operators = "\%chin\%")
 )
 
 # okay
@@ -38,5 +41,5 @@ lint(
 \link{linters} for a complete list of linters available in lintr.
 }
 \section{Tags}{
-\link[=best_practices_linters]{best_practices}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability}
+\link[=best_practices_linters]{best_practices}, \link[=configurable_linters]{configurable}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability}
 }
diff --git a/tests/testthat/test-scalar_in_linter.R b/tests/testthat/test-scalar_in_linter.R
index 2bfd66f83..fb3663087 100644
--- a/tests/testthat/test-scalar_in_linter.R
+++ b/tests/testthat/test-scalar_in_linter.R
@@ -3,11 +3,10 @@ test_that("scalar_in_linter skips allowed usages", {
 
   expect_lint("x %in% y", NULL, linter)
   expect_lint("y %in% c('a', 'b')", NULL, linter)
-  expect_lint("c('a', 'b') %chin% x", NULL, linter)
+  expect_lint("c('a', 'b') %in% x", NULL, linter)
   expect_lint("z %in% 1:3", NULL, linter)
   # scalars on LHS are fine (often used as `"col" %in% names(DF)`)
   expect_lint("3L %in% x", NULL, linter)
-
   # this should be is.na(x), but it more directly uses the "always TRUE/FALSE, _not_ NA"
   #   aspect of %in%, so we delegate linting here to equals_na_linter()
   expect_lint("x %in% NA", NULL, linter)
@@ -15,16 +14,33 @@ test_that("scalar_in_linter skips allowed usages", {
 })
 
 test_that("scalar_in_linter blocks simple disallowed usages", {
-  linter <- scalar_in_linter()
-  lint_in_msg <- rex::rex("Use == to match length-1 scalars, not %in%.")
-  lint_chin_msg <- rex::rex("Use == to match length-1 scalars, not %chin%.")
+  linter <- scalar_in_linter(in_operators = c("%chin%", "%notin%"))
+  lint_msg <- rex::rex("Use comparison operators (e.g. ==, !=, etc.) to match length-1 scalars instead of")
+
+  expect_lint("x %in% 1", lint_msg, linter)
+  expect_lint("x %chin% 'a'", lint_msg, linter)
+  expect_lint("x %notin% 1", lint_msg, linter)
+})
 
-  expect_lint("x %in% 1", lint_in_msg, linter)
-  expect_lint("x %chin% 'a'", lint_chin_msg, linter)
+test_that("scalar_in_linter blocks or skips based on configuration", {
+  linter_default <- scalar_in_linter()
+  linter_config <- scalar_in_linter(in_operators = "%notin%")
+
+  lint_msg <- rex::rex("Use comparison operators (e.g. ==, !=, etc.) to match length-1 scalars instead of")
+
+  # default
+  expect_lint("x %in% 1", lint_msg, linter_default)
+  expect_lint("x %notin% 1", NULL, linter_default)
+  expect_lint("x %notin% y", NULL, linter_default)
+
+  # configured
+  expect_lint("x %in% 1", lint_msg, linter_config)
+  expect_lint("x %notin% 1", lint_msg, linter_config)
+  expect_lint("x %notin% y", NULL, linter_config)
 })
 
 test_that("multiple lints are generated correctly", {
-  linter <- scalar_in_linter()
+  linter <- scalar_in_linter(in_operators = "%chin%")
 
   expect_lint(
     trim_some('{