From fa94235954c9cd42cfc06708589eb5ad964f7bc3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 12 Dec 2023 15:35:56 +0000 Subject: [PATCH 1/4] customize lint message --- R/nzchar_linter.R | 62 ++++++++++++++++++++++------- tests/testthat/test-nzchar_linter.R | 24 ++++++----- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/R/nzchar_linter.R b/R/nzchar_linter.R index f0073ca9e..4e5abba32 100644 --- a/R/nzchar_linter.R +++ b/R/nzchar_linter.R @@ -55,7 +55,7 @@ nzchar_linter <- function() { ]) or ancestor-or-self::expr[ ( - preceding-sibling::expr[SYMBOL_FUNCTION_CALL] + preceding-sibling::expr/SYMBOL_FUNCTION_CALL or preceding-sibling::OP-LEFT-BRACKET ) and not( descendant-or-self::expr[IF or WHILE] @@ -65,6 +65,15 @@ nzchar_linter <- function() { ] ") + comparison_msg_map <- c( + GT = 'Use nzchar(x) instead of x > "". ', + NE = 'Use nzchar(x) instead of x != "". ', + LE = 'Use !nzchar(x) instead of x <= "". ', + EQ = 'Use !nzchar(x) instead of x == "". ', + GE = 'x >= "" is always true, maybe you want nzchar(x)? ', + LT = 'x < "" is always false, maybe you want !nzchar(x)? ' + ) + # nchar(., type="width") not strictly compatible with nzchar # unsure allowNA compatible, so allow it just in case (see TODO in tests) nchar_xpath <- glue(" @@ -73,49 +82,74 @@ nzchar_linter <- function() { /parent::expr /parent::expr[ ({ xp_or(comparator_nodes) }) - and not(expr[SYMBOL_SUB[ + and not(expr/SYMBOL_SUB[ ( text() = 'type' - and following-sibling::expr[1][STR_CONST[contains(text(), 'width')]] + and following-sibling::expr[1]/STR_CONST[contains(text(), 'width')] ) or ( text() = 'allowNA' - and following-sibling::expr[1][NUM_CONST[text() = 'TRUE']] + and following-sibling::expr[1]/NUM_CONST[text() = 'TRUE'] ) - ]]) - and expr[NUM_CONST[text() = '0' or text() = '0L' or text() = '0.0']] + ]) + and expr/NUM_CONST[text() = '0' or text() = '0L' or text() = '0.0'] ] ") + nchar_msg_map <- c( + GT = 'Use nzchar(x) instead of nchar(x) > 0. ', + NE = 'Use nzchar(x) instead of nchar(x) != 0. ', + LE = 'Use !nzchar(x) instead of nchar(x) <= 0. ', + EQ = 'Use !nzchar(x) instead of nchar(x) == 0. ', + GE = 'nchar(x) >= 0 is always true, maybe you want nzchar(x)? ', + LT = 'nchar(x) < 0 is always false, maybe you want !nzchar(x)? ' + ) + keepna_note <- paste( "Whenever missing data is possible,", "please take care to use nzchar(., keepNA = TRUE);", "nzchar(NA) is TRUE by default." ) + # For ordered operators like '>', we need to give the message for + # its "opposite" (not inverse) if the bad usage is on the RHS, + # e.g. 0 < nchar(x) has to be treated as nchar(x) > 0. + op_for_msg <- function(expr, const) { + op <- xml_name(xml_find_first(expr, "*[2]")) + maybe_needs_flip <- !is.na(xml_find_first(expr, sprintf("*[1][%s]", const))) + + ordered_ops <- c("GT", "GE", "LE", "LT") + ordered_idx <- match(op, ordered_ops) + + needs_flip <- maybe_needs_flip & !is.na(ordered_idx) + # un-benchmarked, but should be faster (though less readable) as + # ordered_ops[5L - ordered_idx[needs_flip]] + op[needs_flip] <- rev(ordered_ops)[ordered_idx[needs_flip]] + op + } + Linter(linter_level = "expression", function(source_expression) { xml <- source_expression$xml_parsed_content comparison_expr <- xml_find_all(xml, comparison_xpath) + comparison_op <- op_for_msg(comparison_expr, const = "STR_CONST") comparison_lints <- xml_nodes_to_lints( comparison_expr, source_expression = source_expression, - lint_message = paste( - 'Use nzchar() instead of comparing strings to "".', - "Note that if x is a factor, you'll have use ", - 'as.character() to replicate an implicit conversion that happens in x == "".', + lint_message = paste0( + comparison_msg_map[comparison_op], + "Note that unlike nzchar(), ", comparison_op, " coerces to character, ", + "so you'll have to use as.character() if x is a factor. ", keepna_note ), type = "warning" ) nchar_expr <- xml_find_all(xml, nchar_xpath) + nchar_op <- op_for_msg(nchar_expr, const = "NUM_CONST") nchar_lints <- xml_nodes_to_lints( nchar_expr, source_expression = source_expression, - lint_message = paste( - "Use nzchar() instead of comparing nchar(x) to 0.", - keepna_note - ), + lint_message = paste0(nchar_msg_map[nchar_op], keepna_note), type = "warning" ) diff --git a/tests/testthat/test-nzchar_linter.R b/tests/testthat/test-nzchar_linter.R index 1e6eb9260..462f0ccfd 100644 --- a/tests/testthat/test-nzchar_linter.R +++ b/tests/testthat/test-nzchar_linter.R @@ -25,26 +25,26 @@ test_that("nzchar_linter skips as appropriate for other nchar args", { # nzchar also has keepNA argument so a drop-in switch is easy expect_lint( "nchar(x, keepNA=TRUE) == 0", - rex::rex("Use nzchar() instead of comparing nchar(x) to 0"), + rex::rex("Use !nzchar(x) instead of nchar(x) == 0"), linter ) }) test_that("nzchar_linter blocks simple disallowed usages", { linter <- nzchar_linter() - lint_msg_quote <- rex::rex('Use nzchar() instead of comparing strings to ""') + lint_msg_quote <- rex::rex('Use !nzchar(x) instead of x == "') lint_msg_nchar <- rex::rex("Use nzchar() instead of comparing nchar(x) to 0") expect_lint("which(x == '')", lint_msg_quote, linter) - expect_lint("any(nchar(x) >= 0)", lint_msg_nchar, linter) - expect_lint("all(nchar(x) == 0L)", lint_msg_nchar, linter) - expect_lint("sum(0.0 < nchar(x))", lint_msg_nchar, linter) + expect_lint("any(nchar(x) >= 0)", rex::rex("nchar(x) >= 0 is always true, maybe you want nzchar(x)?"), linter) + expect_lint("all(nchar(x) == 0L)", rex::rex("Use !nzchar(x) instead of nchar(x) == 0"), linter) + expect_lint("sum(0.0 < nchar(x))", rex::rex("Use nzchar(x) instead of nchar(x) > 0"), linter) }) test_that("nzchar_linter skips comparison to '' in if/while statements", { linter <- nzchar_linter() - lint_msg_quote <- rex::rex('Use nzchar() instead of comparing strings to ""') - lint_msg_nchar <- rex::rex("Use nzchar() instead of comparing nchar(x) to 0") + lint_msg_quote <- rex::rex('Use !nzchar(x) instead of x == ""') + lint_msg_nchar <- rex::rex("Use nzchar(x) instead of nchar(x) > 0") # still lint nchar() comparisons expect_lint("if (nchar(x) > 0) TRUE", lint_msg_nchar, linter) @@ -63,11 +63,15 @@ test_that("multiple lints are generated correctly", { expect_lint( trim_some("{ a == '' - nchar(b) != 0 + '' < b + nchar(c) != 0 + 0.0 > nchar(d) }"), list( - list(rex::rex('Use nzchar() instead of comparing strings to ""'), line_number = 2L), - list(rex::rex("Use nzchar() instead of comparing nchar(x) to 0."), line_number = 3L) + list(rex::rex('Use !nzchar(x) instead of x == ""'), line_number = 2L), + list(rex::rex('Use nzchar(x) instead of x > ""'), line_number = 3L), + list(rex::rex("Use nzchar(x) instead of nchar(x) != 0."), line_number = 4L), + list(rex::rex("nchar(x) < 0 is always false, maybe you want !nzchar(x)?"), line_number = 5L) ), nzchar_linter() ) From c166047596709ddd7c947976d7ca87ce9573cf6c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 12 Dec 2023 15:53:05 +0000 Subject: [PATCH 2/4] delint --- R/nzchar_linter.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/nzchar_linter.R b/R/nzchar_linter.R index 4e5abba32..1bed53625 100644 --- a/R/nzchar_linter.R +++ b/R/nzchar_linter.R @@ -96,12 +96,12 @@ nzchar_linter <- function() { ") nchar_msg_map <- c( - GT = 'Use nzchar(x) instead of nchar(x) > 0. ', - NE = 'Use nzchar(x) instead of nchar(x) != 0. ', - LE = 'Use !nzchar(x) instead of nchar(x) <= 0. ', - EQ = 'Use !nzchar(x) instead of nchar(x) == 0. ', - GE = 'nchar(x) >= 0 is always true, maybe you want nzchar(x)? ', - LT = 'nchar(x) < 0 is always false, maybe you want !nzchar(x)? ' + GT = "Use nzchar(x) instead of nchar(x) > 0. ", + NE = "Use nzchar(x) instead of nchar(x) != 0. ", + LE = "Use !nzchar(x) instead of nchar(x) <= 0. ", + EQ = "Use !nzchar(x) instead of nchar(x) == 0. ", + GE = "nchar(x) >= 0 is always true, maybe you want nzchar(x)? ", + LT = "nchar(x) < 0 is always false, maybe you want !nzchar(x)? " ) keepna_note <- paste( @@ -122,7 +122,7 @@ nzchar_linter <- function() { needs_flip <- maybe_needs_flip & !is.na(ordered_idx) # un-benchmarked, but should be faster (though less readable) as - # ordered_ops[5L - ordered_idx[needs_flip]] + # > ordered_ops[5L - ordered_idx[needs_flip]] op[needs_flip] <- rev(ordered_ops)[ordered_idx[needs_flip]] op } From 33f4fa375827a6d09613eb37bf7b14a5fd7472ed Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 13 Dec 2023 07:33:04 +0800 Subject: [PATCH 3/4] Update tests/testthat/test-nzchar_linter.R --- tests/testthat/test-nzchar_linter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-nzchar_linter.R b/tests/testthat/test-nzchar_linter.R index 462f0ccfd..1d8b1285a 100644 --- a/tests/testthat/test-nzchar_linter.R +++ b/tests/testthat/test-nzchar_linter.R @@ -32,7 +32,7 @@ test_that("nzchar_linter skips as appropriate for other nchar args", { test_that("nzchar_linter blocks simple disallowed usages", { linter <- nzchar_linter() - lint_msg_quote <- rex::rex('Use !nzchar(x) instead of x == "') + lint_msg_quote <- rex::rex('Use !nzchar(x) instead of x == ""') lint_msg_nchar <- rex::rex("Use nzchar() instead of comparing nchar(x) to 0") expect_lint("which(x == '')", lint_msg_quote, linter) From fa9360ac17d2dab86455b9b6d31de2925060da57 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 13 Dec 2023 07:02:19 +0000 Subject: [PATCH 4/4] fix examples --- R/nzchar_linter.R | 10 +++++----- man/nzchar_linter.Rd | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/nzchar_linter.R b/R/nzchar_linter.R index 1bed53625..edb60728d 100644 --- a/R/nzchar_linter.R +++ b/R/nzchar_linter.R @@ -6,7 +6,9 @@ #' #' One crucial difference is in the default handling of `NA_character_`, i.e., #' missing strings. `nzchar(NA_character_)` is `TRUE`, while `NA_character_ == ""` -#' and `nchar(NA_character_) == 0` are both `NA`. +#' and `nchar(NA_character_) == 0` are both `NA`. Therefore, for strict +#' compatibility, use `nzchar(x, keepNA = TRUE)`. If the input is known to be +#' complete (no missing entries), this argument can be dropped for conciseness. #' #' @examples #' # will produce lints @@ -22,14 +24,12 @@ #' #' # okay #' lint( -#' text = "x[nchar(x) > 1]", +#' text = "x[!nzchar(x, keepNA = TRUE)]", #' linters = nzchar_linter() #' ) #' -#' # nzchar()'s primary benefit is for vector input; -#' # for guaranteed-scalar cases like if() conditions, comparing to "" is OK. #' lint( -#' text = "if (x == '') y", +#' text = "x[nzchar(x, keepNA = TRUE)]", #' linters = nzchar_linter() #' ) #' diff --git a/man/nzchar_linter.Rd b/man/nzchar_linter.Rd index d729bd63d..92fd83b10 100644 --- a/man/nzchar_linter.Rd +++ b/man/nzchar_linter.Rd @@ -14,7 +14,9 @@ constructions like \code{string == ""} or \code{nchar(string) == 0}. \details{ One crucial difference is in the default handling of \code{NA_character_}, i.e., missing strings. \code{nzchar(NA_character_)} is \code{TRUE}, while \code{NA_character_ == ""} -and \code{nchar(NA_character_) == 0} are both \code{NA}. +and \code{nchar(NA_character_) == 0} are both \code{NA}. Therefore, for strict +compatibility, use \code{nzchar(x, keepNA = TRUE)}. If the input is known to be +complete (no missing entries), this argument can be dropped for conciseness. } \examples{ # will produce lints @@ -30,14 +32,12 @@ lint( # okay lint( - text = "x[nchar(x) > 1]", + text = "x[!nzchar(x, keepNA = TRUE)]", linters = nzchar_linter() ) -# nzchar()'s primary benefit is for vector input; -# for guaranteed-scalar cases like if() conditions, comparing to "" is OK. lint( - text = "if (x == '') y", + text = "x[nzchar(x, keepNA = TRUE)]", linters = nzchar_linter() )