From fab0b7e779105b2be304fbed6706b33d8a1ac57f Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Thu, 24 Aug 2023 16:57:19 -0400 Subject: [PATCH] Broaden acceptable inputs for unique_if_invariant() --- NAMESPACE | 2 - NEWS.md | 6 +- R/vctr_to_scalar.R | 94 ++++++++++++++++++------ man/unique_if_invariant.Rd | 49 +++++++++---- tests/testthat/test-vctr_to_scalar.R | 102 +++++++++++++++++++++++++++ 5 files changed, 215 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 51f2448e..e5e2e3e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,8 +29,6 @@ S3method(tidy_pub_records,scopus_search_list) S3method(to_character,data.frame) S3method(to_character,default) S3method(to_character,list) -S3method(unique_if_invariant,character) -S3method(unique_if_invariant,numeric) export("%>%") export(DOrepo) export(all_duplicated) diff --git a/NEWS.md b/NEWS.md index d9cc655f..45d97a8d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,11 @@ ## General * `lexiclean()` _(new!!)_: processes text for improved text matching. -* Fix `read_delim_auto()` to handle compressed input. +* Fixed `read_delim_auto()` to handle compressed input. +* Broadened `unique_if_invariant()` no longer uses it's own methods and instead +relies on `base::unique()`. This may have some unintended consequences, +particularly where custom methods of `unique()` are defined but it works for +more inputs, better matching expectations. ## DO Management & Analysis diff --git a/R/vctr_to_scalar.R b/R/vctr_to_scalar.R index 750d529f..7525e70a 100644 --- a/R/vctr_to_scalar.R +++ b/R/vctr_to_scalar.R @@ -1,36 +1,88 @@ #' Return Unique Value for Invariant Vectors #' -#' Returns the unique value from a vector of any length, if and only if, only 1 -#' unique value exists (_i.e._ the vector is invariant), otherwise returns the -#' original vector. +#' Returns the unique value from an input, if and only if, only 1 unique value +#' exists (_i.e._ the input is invariant), otherwise returns the original input. +#' Uniqueness is determined by [base::unique()] for flexibility but +#' `unique_if_invariant()` may fail for custom methods. #' -#' @inheritParams is_invariant +#' @param x An R object, except arrays which are not supported. +#' @param na.rm A logical scalar indicating whether `NA` values should be +#' removed (default: FALSE); powered by [stats::na.omit()] and may be +#' limited by its methods. +#' @param incl_nm A logic scalar indicating whether names should also be +#' examined (default: FALSE). +#' @param ... Arguments passed on to [base::unique()] methods. #' #' @seealso For _unconditional_ vector-to-string conversion methods, see the #' [vctr_to_string()] family of functions. #' +#' @examples +#' unique_if_invariant(c("a", "a")) +#' unique_if_invariant(c("a", "b")) +#' +#' # `NA` can be ignored +#' unique_if_invariant(c("a", NA)) +#' unique_if_invariant(c("a", NA), na.rm = TRUE) +#' +#' # names are ignored by default (and often dropped); to consider and preserve +#' # them use `incl_nm = TRUE` +#' unique_if_invariant(c(a = "A", b = "A")) +#' unique_if_invariant(c(a = "A", b = "A"), incl_nm = TRUE) +#' unique_if_invariant(c(a = "A", a = "A"), incl_nm = TRUE) +#' +#' # na.rm & incl_nm are ignored for matrices & data.frames due to undesirable +#' # results; as with base::unique(), matrix comparison preserves columns +#' m <- matrix(rep(1, 4), 2) +#' unique_if_invariant(m) +#' +#' .df <- data.frame(m, check.names = TRUE) +#' unique_if_invariant(.df) +#' #' @export -unique_if_invariant <- function(x, na.rm = FALSE, ...) { - UseMethod("unique_if_invariant") -} +unique_if_invariant <- function(x, na.rm = FALSE, incl_nm = FALSE, ...) { + assert_scalar_logical(na.rm) + assert_scalar_logical(incl_nm) -#' @export -#' @rdname unique_if_invariant -unique_if_invariant.character <- function(x, na.rm = FALSE, ...) { - if (is_invariant(x, na.rm = na.rm, ...)) { - return(unique(x)) + ndim <- length(dim(x)) + if (ndim > 2) { + rlang::abort( + c( + "unique_if_invariant() does not support objects with >2 dimensions.", + x = paste0("`dim(x)` = ", ndim) + ) + ) } - x -} -#' @export -#' @rdname unique_if_invariant -unique_if_invariant.numeric <- function(x, na.rm = FALSE, - tol = sqrt(.Machine$double.eps), ...) { - if (is_invariant(x, na.rm = na.rm, tol = tol, ...)) { - return(mean(x, na.rm = na.rm)) + uniq <- x + if (na.rm & !all(is.na(x))) { + if (ndim == 0) { + uniq <- stats::na.omit(uniq) + } else { + rlang::warn("`na.rm` is ignored when `x` has 2 dimensions.") + } + } + uniq <- unique(uniq, ...) + + + if (ndim == 2) { + if (incl_nm) { + rlang::warn("`incl_nm` is ignored when `x` has 2 dimensions.") + } + n_out <- nrow(uniq) + } else { + n_out <- length(uniq) + if (incl_nm) { + uniq_nm <- unique(names(x), ...) + n_out <- max(n_out, length(uniq_nm)) + } + } + + if (n_out == 1) { + if (incl_nm && ndim == 0) names(uniq) <- uniq_nm + uniq + } else { + x } - x } diff --git a/man/unique_if_invariant.Rd b/man/unique_if_invariant.Rd index 15ec6bc1..0c24cea8 100644 --- a/man/unique_if_invariant.Rd +++ b/man/unique_if_invariant.Rd @@ -2,29 +2,50 @@ % Please edit documentation in R/vctr_to_scalar.R \name{unique_if_invariant} \alias{unique_if_invariant} -\alias{unique_if_invariant.character} -\alias{unique_if_invariant.numeric} \title{Return Unique Value for Invariant Vectors} \usage{ -unique_if_invariant(x, na.rm = FALSE, ...) - -\method{unique_if_invariant}{character}(x, na.rm = FALSE, ...) - -\method{unique_if_invariant}{numeric}(x, na.rm = FALSE, tol = sqrt(.Machine$double.eps), ...) +unique_if_invariant(x, na.rm = FALSE, incl_nm = FALSE, ...) } \arguments{ -\item{x}{vector to be tested} +\item{x}{An R object, except arrays which are not supported.} -\item{na.rm}{logical indicating whether to exclude NA values} +\item{na.rm}{A logical scalar indicating whether \code{NA} values should be +removed (default: FALSE); powered by \code{\link[stats:na.fail]{stats::na.omit()}} and may be +limited by its methods.} -\item{...}{unused; for extensibility} +\item{incl_nm}{A logic scalar indicating whether names should also be +examined (default: FALSE).} -\item{tol}{double, tolerance to use (for numeric vectors)} +\item{...}{Arguments passed on to \code{\link[base:unique]{base::unique()}} methods.} } \description{ -Returns the unique value from a vector of any length, if and only if, only 1 -unique value exists (\emph{i.e.} the vector is invariant), otherwise returns the -original vector. +Returns the unique value from an input, if and only if, only 1 unique value +exists (\emph{i.e.} the input is invariant), otherwise returns the original input. +Uniqueness is determined by \code{\link[base:unique]{base::unique()}} for flexibility but +\code{unique_if_invariant()} may fail for custom methods. +} +\examples{ +unique_if_invariant(c("a", "a")) +unique_if_invariant(c("a", "b")) + +# `NA` can be ignored +unique_if_invariant(c("a", NA)) +unique_if_invariant(c("a", NA), na.rm = TRUE) + +# names are ignored by default (and often dropped); to consider and preserve +# them use `incl_nm = TRUE` +unique_if_invariant(c(a = "A", b = "A")) +unique_if_invariant(c(a = "A", b = "A"), incl_nm = TRUE) +unique_if_invariant(c(a = "A", a = "A"), incl_nm = TRUE) + +# na.rm & incl_nm are ignored for matrices & data.frames due to undesirable +# results; as with base::unique(), matrix comparison preserves columns +m <- matrix(rep(1, 4), 2) +unique_if_invariant(m) + +.df <- data.frame(m, check.names = TRUE) +unique_if_invariant(.df) + } \seealso{ For \emph{unconditional} vector-to-string conversion methods, see the diff --git a/tests/testthat/test-vctr_to_scalar.R b/tests/testthat/test-vctr_to_scalar.R index d1e8f211..c55327e6 100644 --- a/tests/testthat/test-vctr_to_scalar.R +++ b/tests/testthat/test-vctr_to_scalar.R @@ -1,3 +1,105 @@ +# unique_if_invariant() --------------------------------------------------- + +test_that("unique_if_invariant() returns 1 value when unique", { + expect_identical(unique_if_invariant(rep("a", 2)), "a") + expect_identical(unique_if_invariant(rep(1, 2)), 1) + expect_identical(unique_if_invariant(rep(T, 2)), T) + expect_identical(unique_if_invariant(rep(NA, 2)), NA) + m <- matrix(rep(1, 4), 2) + expect_identical(unique_if_invariant(m), matrix(rep(1, 2), 1)) + expect_identical( + unique_if_invariant(as.data.frame(m)), as.data.frame(m)[1, ] + ) +}) + +test_that("unique_if_invariant() returns input value when NOT unique", { + expect_identical(unique_if_invariant(c("a", "b")), c("a", "b")) + expect_identical(unique_if_invariant(1:2), 1:2) + expect_identical(unique_if_invariant(c(T, F)), c(T, F)) + expect_identical(unique_if_invariant(c(T, NA)), c(T, NA)) + m <- matrix(rep(1:2, 2), 2) + expect_identical(unique_if_invariant(m), m) + expect_identical( + unique_if_invariant(as.data.frame(m)), as.data.frame(m) + ) +}) + +test_that("unique_if_invariant() na.rm works", { + expect_identical(unique_if_invariant(c("a", NA), na.rm = T), "a") + expect_identical(unique_if_invariant(c(1, NA), na.rm = T), 1) + expect_identical(unique_if_invariant(c(T, NA), na.rm = T), T) + + m <- matrix(rep(c(1, NA), each = 2), 2) # same column + expect_warning( + expect_identical(unique_if_invariant(m, na.rm = T), m[1, , drop = F]), + regexp = "na\\.rm.*ignored.*dim" + ) + expect_warning( + expect_identical( + unique_if_invariant(as.data.frame(m), na.rm = T), + as.data.frame(m)[1, ] + ), + regexp = "na\\.rm.*ignored.*dim" + ) + + m2 <- matrix(rep(c(1, NA), 2), 2) # same row + expect_warning( + expect_identical(unique_if_invariant(m2, na.rm = T), m2), + regexp = "na\\.rm.*ignored.*dim" + ) + expect_warning( + expect_identical( + unique_if_invariant(as.data.frame(m2), na.rm = T), + as.data.frame(m2) + ), + regexp = "na\\.rm.*ignored.*dim" + ) +}) + +test_that("unique_if_invariant() incl_nm works", { + expect_identical( + unique_if_invariant(rep(c(a = "A"), 2), incl_nm = T), + c(a = "A") + ) + expect_identical( + unique_if_invariant(rep(c(a = 1), 2), incl_nm = T), + c(a = 1) + ) + expect_identical( + unique_if_invariant(rep(c(a = T), 2), incl_nm = T), + c(a = T) + ) + expect_identical( + unique_if_invariant(rep(c(a = NA), 2), incl_nm = T), + c(a = NA) + ) + + m <- matrix(rep(1, 4), 2) + colnames(m) <- c("a", "b") + expect_warning( + expect_identical( + unique_if_invariant(m, incl_nm = T), + m[1, , drop = FALSE] + ), + regexp = "incl_nm.*ignored.*dim" + ) + expect_warning( + expect_identical( + unique_if_invariant(as.data.frame(m), incl_nm = T), + as.data.frame(m)[1, ] + ), + regexp = "incl_nm.*ignored.*dim" + ) + + # only name differs + expect_identical( + unique_if_invariant(c(a = "A", b = "A"), incl_nm = T), + c(a = "A", b = "A") + ) + # name ignored (and dropped) without incl_nm + expect_identical(unique_if_invariant(c(a = "A", b = "A")), "A") +}) + # vctr_to_string() --------------------------------------------------------