From 16e4c755981250a1fadcf7e45291373743813914 Mon Sep 17 00:00:00 2001 From: David Hugh-Jones Date: Tue, 4 Jun 2024 08:24:39 +0100 Subject: [PATCH] Fix #49: allow singleton breaks in brk_quantile() --- R/breaks.R | 6 +++--- R/chop.R | 6 ++++++ R/utils.R | 22 ++++++++++++++++++++++ man/chop_quantiles.Rd | 6 ++++++ tests/testthat/test-breaks.R | 15 +++++++++++++-- tests/testthat/test-chop.R | 2 +- tests/testthat/test-zzz-systematic.R | 10 +--------- 7 files changed, 52 insertions(+), 15 deletions(-) diff --git a/R/breaks.R b/R/breaks.R index 655a981..9d0e9f2 100644 --- a/R/breaks.R +++ b/R/breaks.R @@ -31,9 +31,9 @@ brk_quantiles <- function (probs, ..., weights = NULL) { if (anyNA(qs)) return(empty_breaks()) # data was all NA - non_dupes <- ! duplicated(qs) - qs <- qs[non_dupes] - probs <- probs[non_dupes] + dupe_middles <- find_duplicated_middles(qs) + qs <- qs[! dupe_middles] + probs <- probs[! dupe_middles] breaks <- create_lr_breaks(qs, left) diff --git a/R/chop.R b/R/chop.R index 26156f5..fd39f67 100644 --- a/R/chop.R +++ b/R/chop.R @@ -279,6 +279,9 @@ fillet <- function ( #' for calculating "type 1" quantiles, since they round down. See #' [stats::quantile()]. #' +#' If `x` contains duplicates, consecutive quantiles may be the same number +#' so that some intervals get merged. +#' #' @family chopping functions #' #' @export @@ -296,6 +299,9 @@ fillet <- function ( #' # to label by the quantiles themselves: #' chop_quantiles(1:10, 1:3/4, raw = TRUE) #' +#' # duplicates: +#' tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5) +#' chop_quantiles <- function( x, probs, diff --git a/R/utils.R b/R/utils.R index a7d6858..c7f4c3a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -60,6 +60,28 @@ singletons <- function (breaks) { } +#' Find middle numbers in runs of 3 or more duplicates. +#' +#' You can use this to find "illegal" breaks. +#' +#' @param x +#' +#' @return A logical vector of length(x), TRUE if the corresponding element +#' is in the middle of 3 or more duplicates +#' @noRd +#' +#' @examples +#' find_duplicated_middles(c(1, 2, 2, 3, 3, 3, 4)) +find_duplicated_middles <- function (x) { + if (length(x) == 0) return(logical(0)) + dupes <- duplicated(x) + # If element n is duplicated, and element n+1 is duplicated, then you + # are a duplicated middle. + # The last element is never a duplicated middle. + c(dupes[-length(dupes)] & dupes[-1], FALSE) +} + + `%||%` <- function (x, y) if (is.null(x)) y else x diff --git a/man/chop_quantiles.Rd b/man/chop_quantiles.Rd index dfbe019..9d13aca 100644 --- a/man/chop_quantiles.Rd +++ b/man/chop_quantiles.Rd @@ -56,6 +56,9 @@ passed to \code{\link[stats:quantile]{stats::quantile()}} or \code{\link[Hmisc:w For non-numeric \code{x}, \code{left} is set to \code{FALSE} by default. This works better for calculating "type 1" quantiles, since they round down. See \code{\link[stats:quantile]{stats::quantile()}}. + +If \code{x} contains duplicates, consecutive quantiles may be the same number +so that some intervals get merged. } \examples{ chop_quantiles(1:10, 1:3/4) @@ -69,6 +72,9 @@ chop_deciles(1:10) # to label by the quantiles themselves: chop_quantiles(1:10, 1:3/4, raw = TRUE) +# duplicates: +tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5) + set.seed(42) tab_quantiles(rnorm(100), probs = 1:3/4, raw = TRUE) diff --git a/tests/testthat/test-breaks.R b/tests/testthat/test-breaks.R index 6df4ba8..fd10ddf 100644 --- a/tests/testthat/test-breaks.R +++ b/tests/testthat/test-breaks.R @@ -185,7 +185,7 @@ test_that("brk_quantiles", { x <- rep(1, 5) brks <- brk_quantiles(1:3/4)(x, FALSE, TRUE, FALSE) - expect_equivalent(c(brks), unique(quantile(x, 1:3/4))) + expect_equivalent(c(brks), c(1, 1)) x <- 1:10 brks <- brk_quantiles(1:3/4, weights = 1:10)(x, FALSE, TRUE, FALSE) @@ -193,6 +193,17 @@ test_that("brk_quantiles", { }) +test_that("bugfix #49: brk_quantiles() shouldn't ignore duplicate quantiles", { + x <- c(1, 1, 2, 3, 4) + brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE) + expect_equivalent(c(brks), c(1.0, 1.0, 1.6, 2.4, 3.2, 4.0)) + + x <- c(1, 1, 1, 2, 3) + brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE) + expect_equivalent(c(brks), c(1.0, 1.0, 1.4, 2.2, 3.0)) +}) + + test_that("brk_equally", { expect_silent(brk_res(brk_equally(5))) expect_error(brk_equally(4.5)) @@ -203,7 +214,7 @@ test_that("brk_equally", { test_that("brk_equally warns when too few breaks created", { - dupes <- c(1, 1, 1, 2, 3, 4, 4, 4) + dupes <- rep(1, 4) expect_warning(brk_res(brk_equally(4), x = dupes)) }) diff --git a/tests/testthat/test-chop.R b/tests/testthat/test-chop.R index f8d6c3f..b1fe22e 100644 --- a/tests/testthat/test-chop.R +++ b/tests/testthat/test-chop.R @@ -275,7 +275,7 @@ test_that("chop_equally", { ) expect_warning( - chop_equally(c(1, 1, 2, 2), 4), + chop_equally(c(1, 1, 1, 1), 4), "Fewer" ) }) diff --git a/tests/testthat/test-zzz-systematic.R b/tests/testthat/test-zzz-systematic.R index af92ed1..c908ec5 100644 --- a/tests/testthat/test-zzz-systematic.R +++ b/tests/testthat/test-zzz-systematic.R @@ -103,14 +103,6 @@ test_that("systematic tests", { & test_df$brk_fun %in% c("brk_n", "brk_n_merge")) - # all quantiles will be the same here, so no way to create - # intervals if extend is FALSE - should_fail(with(test_df, - names(x) %in% c("same", "one") & - brk_fun == "brk_quantiles" & - extend == FALSE - )) - # brk_default_hi and _lo have a single break, so if you can't # extend it, there are no possible intervals: should_fail(with(test_df, @@ -160,7 +152,7 @@ test_that("systematic tests", { # quantiles here likely to create duplicate endpoints dont_care(with(test_df, - names(x) == "char" & + names(x) %in% c("one", "same", "char") & lbl_fun == "lbl_endpoints" & brk_fun == "brk_quantiles" & extend == TRUE & raw == TRUE