From 16e4c755981250a1fadcf7e45291373743813914 Mon Sep 17 00:00:00 2001
From: David Hugh-Jones <davidhughjones@gmail.com>
Date: Tue, 4 Jun 2024 08:24:39 +0100
Subject: [PATCH] Fix #49: allow singleton breaks in brk_quantile()

---
 R/breaks.R                           |  6 +++---
 R/chop.R                             |  6 ++++++
 R/utils.R                            | 22 ++++++++++++++++++++++
 man/chop_quantiles.Rd                |  6 ++++++
 tests/testthat/test-breaks.R         | 15 +++++++++++++--
 tests/testthat/test-chop.R           |  2 +-
 tests/testthat/test-zzz-systematic.R | 10 +---------
 7 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/R/breaks.R b/R/breaks.R
index 655a981..9d0e9f2 100644
--- a/R/breaks.R
+++ b/R/breaks.R
@@ -31,9 +31,9 @@ brk_quantiles <- function (probs, ..., weights = NULL) {
 
     if (anyNA(qs)) return(empty_breaks()) # data was all NA
 
-    non_dupes <- ! duplicated(qs)
-    qs <- qs[non_dupes]
-    probs <- probs[non_dupes]
+    dupe_middles <- find_duplicated_middles(qs)
+    qs <- qs[! dupe_middles]
+    probs <- probs[! dupe_middles]
 
     breaks <- create_lr_breaks(qs, left)
 
diff --git a/R/chop.R b/R/chop.R
index 26156f5..fd39f67 100644
--- a/R/chop.R
+++ b/R/chop.R
@@ -279,6 +279,9 @@ fillet <- function (
 #' for calculating "type 1" quantiles, since they round down. See
 #' [stats::quantile()].
 #'
+#' If `x` contains duplicates, consecutive quantiles may be the same number
+#' so that some intervals get merged.
+#'
 #' @family chopping functions
 #'
 #' @export
@@ -296,6 +299,9 @@ fillet <- function (
 #' # to label by the quantiles themselves:
 #' chop_quantiles(1:10, 1:3/4, raw = TRUE)
 #'
+#' # duplicates:
+#' tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5)
+#'
 chop_quantiles <- function(
                     x,
                     probs,
diff --git a/R/utils.R b/R/utils.R
index a7d6858..c7f4c3a 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -60,6 +60,28 @@ singletons <- function (breaks) {
 }
 
 
+#' Find middle numbers in runs of 3 or more duplicates.
+#'
+#' You can use this to find "illegal" breaks.
+#'
+#' @param x
+#'
+#' @return A logical vector of length(x), TRUE if the corresponding element
+#'   is in the middle of 3 or more duplicates
+#' @noRd
+#'
+#' @examples
+#' find_duplicated_middles(c(1, 2, 2, 3, 3, 3, 4))
+find_duplicated_middles <- function (x) {
+  if (length(x) == 0) return(logical(0))
+  dupes <- duplicated(x)
+  # If element n is duplicated, and element n+1 is duplicated, then you
+  # are a duplicated middle.
+  # The last element is never a duplicated middle.
+  c(dupes[-length(dupes)] & dupes[-1], FALSE)
+}
+
+
 `%||%` <- function (x, y) if (is.null(x)) y else x
 
 
diff --git a/man/chop_quantiles.Rd b/man/chop_quantiles.Rd
index dfbe019..9d13aca 100644
--- a/man/chop_quantiles.Rd
+++ b/man/chop_quantiles.Rd
@@ -56,6 +56,9 @@ passed to \code{\link[stats:quantile]{stats::quantile()}} or \code{\link[Hmisc:w
 For non-numeric \code{x}, \code{left} is set to \code{FALSE} by default. This works better
 for calculating "type 1" quantiles, since they round down. See
 \code{\link[stats:quantile]{stats::quantile()}}.
+
+If \code{x} contains duplicates, consecutive quantiles may be the same number
+so that some intervals get merged.
 }
 \examples{
 chop_quantiles(1:10, 1:3/4)
@@ -69,6 +72,9 @@ chop_deciles(1:10)
 # to label by the quantiles themselves:
 chop_quantiles(1:10, 1:3/4, raw = TRUE)
 
+# duplicates:
+tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5)
+
 set.seed(42)
 tab_quantiles(rnorm(100), probs = 1:3/4, raw = TRUE)
 
diff --git a/tests/testthat/test-breaks.R b/tests/testthat/test-breaks.R
index 6df4ba8..fd10ddf 100644
--- a/tests/testthat/test-breaks.R
+++ b/tests/testthat/test-breaks.R
@@ -185,7 +185,7 @@ test_that("brk_quantiles", {
 
   x <- rep(1, 5)
   brks <- brk_quantiles(1:3/4)(x, FALSE, TRUE, FALSE)
-  expect_equivalent(c(brks), unique(quantile(x, 1:3/4)))
+  expect_equivalent(c(brks), c(1, 1))
 
   x <- 1:10
   brks <- brk_quantiles(1:3/4, weights = 1:10)(x, FALSE, TRUE, FALSE)
@@ -193,6 +193,17 @@ test_that("brk_quantiles", {
 })
 
 
+test_that("bugfix #49: brk_quantiles() shouldn't ignore duplicate quantiles", {
+  x <- c(1, 1, 2, 3, 4)
+  brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE)
+  expect_equivalent(c(brks), c(1.0, 1.0, 1.6, 2.4, 3.2, 4.0))
+
+  x <- c(1, 1, 1, 2, 3)
+  brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE)
+  expect_equivalent(c(brks), c(1.0, 1.0, 1.4, 2.2, 3.0))
+})
+
+
 test_that("brk_equally", {
   expect_silent(brk_res(brk_equally(5)))
   expect_error(brk_equally(4.5))
@@ -203,7 +214,7 @@ test_that("brk_equally", {
 
 
 test_that("brk_equally warns when too few breaks created", {
-  dupes <- c(1, 1, 1, 2, 3, 4, 4, 4)
+  dupes <- rep(1, 4)
   expect_warning(brk_res(brk_equally(4), x = dupes))
 })
 
diff --git a/tests/testthat/test-chop.R b/tests/testthat/test-chop.R
index f8d6c3f..b1fe22e 100644
--- a/tests/testthat/test-chop.R
+++ b/tests/testthat/test-chop.R
@@ -275,7 +275,7 @@ test_that("chop_equally", {
   )
 
   expect_warning(
-    chop_equally(c(1, 1, 2, 2), 4),
+    chop_equally(c(1, 1, 1, 1), 4),
     "Fewer"
   )
 })
diff --git a/tests/testthat/test-zzz-systematic.R b/tests/testthat/test-zzz-systematic.R
index af92ed1..c908ec5 100644
--- a/tests/testthat/test-zzz-systematic.R
+++ b/tests/testthat/test-zzz-systematic.R
@@ -103,14 +103,6 @@ test_that("systematic tests", {
               & test_df$brk_fun %in% c("brk_n", "brk_n_merge"))
 
 
-  # all quantiles will be the same here, so no way to create
-  # intervals if extend is FALSE
-  should_fail(with(test_df,
-          names(x) %in% c("same", "one") &
-          brk_fun == "brk_quantiles" &
-          extend == FALSE
-        ))
-
   # brk_default_hi and _lo have a single break, so if you can't
   # extend it, there are no possible intervals:
   should_fail(with(test_df,
@@ -160,7 +152,7 @@ test_that("systematic tests", {
 
   # quantiles here likely to create duplicate endpoints
   dont_care(with(test_df,
-                 names(x) == "char" &
+                 names(x) %in% c("one", "same", "char") &
                  lbl_fun == "lbl_endpoints" &
                  brk_fun == "brk_quantiles" &
                  extend == TRUE & raw == TRUE