Skip to content

Commit

Permalink
Fix #49: allow singleton breaks in brk_quantile()
Browse files Browse the repository at this point in the history
  • Loading branch information
hughjonesd committed Jun 4, 2024
1 parent 1a3f5d7 commit 16e4c75
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 15 deletions.
6 changes: 3 additions & 3 deletions R/breaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ brk_quantiles <- function (probs, ..., weights = NULL) {

if (anyNA(qs)) return(empty_breaks()) # data was all NA

non_dupes <- ! duplicated(qs)
qs <- qs[non_dupes]
probs <- probs[non_dupes]
dupe_middles <- find_duplicated_middles(qs)
qs <- qs[! dupe_middles]
probs <- probs[! dupe_middles]

breaks <- create_lr_breaks(qs, left)

Expand Down
6 changes: 6 additions & 0 deletions R/chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ fillet <- function (
#' for calculating "type 1" quantiles, since they round down. See
#' [stats::quantile()].
#'
#' If `x` contains duplicates, consecutive quantiles may be the same number
#' so that some intervals get merged.
#'
#' @family chopping functions
#'
#' @export
Expand All @@ -296,6 +299,9 @@ fillet <- function (
#' # to label by the quantiles themselves:
#' chop_quantiles(1:10, 1:3/4, raw = TRUE)
#'
#' # duplicates:
#' tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5)
#'
chop_quantiles <- function(
x,
probs,
Expand Down
22 changes: 22 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,28 @@ singletons <- function (breaks) {
}


#' Find middle numbers in runs of 3 or more duplicates.
#'
#' You can use this to find "illegal" breaks.
#'
#' @param x
#'
#' @return A logical vector of length(x), TRUE if the corresponding element
#' is in the middle of 3 or more duplicates
#' @noRd
#'
#' @examples
#' find_duplicated_middles(c(1, 2, 2, 3, 3, 3, 4))
find_duplicated_middles <- function (x) {
if (length(x) == 0) return(logical(0))
dupes <- duplicated(x)
# If element n is duplicated, and element n+1 is duplicated, then you
# are a duplicated middle.
# The last element is never a duplicated middle.
c(dupes[-length(dupes)] & dupes[-1], FALSE)
}


`%||%` <- function (x, y) if (is.null(x)) y else x


Expand Down
6 changes: 6 additions & 0 deletions man/chop_quantiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions tests/testthat/test-breaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,25 @@ test_that("brk_quantiles", {

x <- rep(1, 5)
brks <- brk_quantiles(1:3/4)(x, FALSE, TRUE, FALSE)
expect_equivalent(c(brks), unique(quantile(x, 1:3/4)))
expect_equivalent(c(brks), c(1, 1))

x <- 1:10
brks <- brk_quantiles(1:3/4, weights = 1:10)(x, FALSE, TRUE, FALSE)
expect_equivalent(c(brks), Hmisc::wtd.quantile(x, weights = 1:10, probs = 1:3/4))
})


test_that("bugfix #49: brk_quantiles() shouldn't ignore duplicate quantiles", {
x <- c(1, 1, 2, 3, 4)
brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE)
expect_equivalent(c(brks), c(1.0, 1.0, 1.6, 2.4, 3.2, 4.0))

x <- c(1, 1, 1, 2, 3)
brks <- brk_quantiles(0:5/5)(x, FALSE, TRUE, FALSE)
expect_equivalent(c(brks), c(1.0, 1.0, 1.4, 2.2, 3.0))
})


test_that("brk_equally", {
expect_silent(brk_res(brk_equally(5)))
expect_error(brk_equally(4.5))
Expand All @@ -203,7 +214,7 @@ test_that("brk_equally", {


test_that("brk_equally warns when too few breaks created", {
dupes <- c(1, 1, 1, 2, 3, 4, 4, 4)
dupes <- rep(1, 4)
expect_warning(brk_res(brk_equally(4), x = dupes))
})

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ test_that("chop_equally", {
)

expect_warning(
chop_equally(c(1, 1, 2, 2), 4),
chop_equally(c(1, 1, 1, 1), 4),
"Fewer"
)
})
Expand Down
10 changes: 1 addition & 9 deletions tests/testthat/test-zzz-systematic.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,6 @@ test_that("systematic tests", {
& test_df$brk_fun %in% c("brk_n", "brk_n_merge"))


# all quantiles will be the same here, so no way to create
# intervals if extend is FALSE
should_fail(with(test_df,
names(x) %in% c("same", "one") &
brk_fun == "brk_quantiles" &
extend == FALSE
))

# brk_default_hi and _lo have a single break, so if you can't
# extend it, there are no possible intervals:
should_fail(with(test_df,
Expand Down Expand Up @@ -160,7 +152,7 @@ test_that("systematic tests", {

# quantiles here likely to create duplicate endpoints
dont_care(with(test_df,
names(x) == "char" &
names(x) %in% c("one", "same", "char") &
lbl_fun == "lbl_endpoints" &
brk_fun == "brk_quantiles" &
extend == TRUE & raw == TRUE
Expand Down

0 comments on commit 16e4c75

Please sign in to comment.