diff --git a/DESCRIPTION b/DESCRIPTION index b4cd9ec950..cd0ff473ef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -254,6 +254,7 @@ Collate: 'stat-ellipse.R' 'stat-function.R' 'stat-identity.R' + 'stat-manual.R' 'stat-qq-line.R' 'stat-qq.R' 'stat-quantilemethods.R' diff --git a/NAMESPACE b/NAMESPACE index f0ccf3bec1..489e42e50a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -267,6 +267,7 @@ export(StatEcdf) export(StatEllipse) export(StatFunction) export(StatIdentity) +export(StatManual) export(StatQq) export(StatQqLine) export(StatQuantile) @@ -678,6 +679,7 @@ export(stat_ecdf) export(stat_ellipse) export(stat_function) export(stat_identity) +export(stat_manual) export(stat_qq) export(stat_qq_line) export(stat_quantile) diff --git a/NEWS.md b/NEWS.md index 5df6059f0f..2e49d964ca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # ggplot2 (development version) +* New stat: `stat_manual()` for arbitrary computations (@teunbrand, #3501) * Built-in `theme_*()` functions now have `ink` and `paper` arguments to control foreground and background colours respectively (@teunbrand) * The `summary()` method for ggplots is now more terse about facets diff --git a/R/stat-manual.R b/R/stat-manual.R new file mode 100644 index 0000000000..994c8d622e --- /dev/null +++ b/R/stat-manual.R @@ -0,0 +1,131 @@ + +#' Manually compute transformations +#' +#' `stat_manual()` takes a function that computes a data transformation for +#' every group. +#' +#' @inheritParams layer +#' @inheritParams geom_point +#' @param fun Function that takes a data frame as input and returns a data +#' frame or data frame-like list as output. The default (`identity()`) returns +#' the data unchanged. +#' @param args A list of arguments to pass to the function given in `fun`. +#' +#' @eval rd_aesthetics("stat", "manual") +#' @section Aesthetics: +#' Input aesthetics are determined by the `fun` argument. Output aesthetics must +#' include those required by `geom`. Any aesthetic that is constant within a +#' group will be preserved even if dropped by `fun`. +#' +#' @export +#' +#' @examples +#' # A standard scatterplot +#' p <- ggplot(mtcars, aes(disp, mpg, colour = factor(cyl))) + +#' geom_point() +#' +#' # The default just displays points as-is +#' p + stat_manual() +#' +#' # Using a custom function +#' make_hull <- function(data) { +#' hull <- chull(x = data$x, y = data$y) +#' data.frame(x = data$x[hull], y = data$y[hull]) +#' } +#' +#' p + stat_manual( +#' geom = "polygon", +#' fun = make_hull, +#' fill = NA +#' ) +#' +#' # Using the `with` function with quoting +#' p + stat_manual( +#' fun = with, +#' args = list(expr = quote({ +#' hull <- chull(x, y) +#' list(x = x[hull], y = y[hull]) +#' })), +#' geom = "polygon", fill = NA +#' ) +#' +#' # Using the `transform` function with quoting +#' p + stat_manual( +#' geom = "segment", +#' fun = transform, +#' args = list( +#' xend = quote(mean(x)), +#' yend = quote(mean(y)) +#' ) +#' ) +#' +#' # Using dplyr verbs with `vars()` +#' if (requireNamespace("dplyr", quietly = TRUE)) { +#' +#' # Get centroids with `summarise()` +#' p + stat_manual( +#' size = 10, shape = 21, +#' fun = dplyr::summarise, +#' args = vars(x = mean(x), y = mean(y)) +#' ) +#' +#' # Connect to centroid with `mutate` +#' p + stat_manual( +#' geom = "segment", +#' fun = dplyr::mutate, +#' args = vars(xend = mean(x), yend = mean(y)) +#' ) +#' +#' # Computing hull with `reframe()` +#' p + stat_manual( +#' geom = "polygon", fill = NA, +#' fun = dplyr::reframe, +#' args = vars(hull = chull(x, y), x = x[hull], y = y[hull]) +#' ) +#' } +stat_manual <- function( + mapping = NULL, + data = NULL, + geom = "point", + position = "identity", + ..., + fun = identity, + args = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE) { + + layer( + data = data, + mapping = mapping, + stat = StatManual, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + fun = fun, + args = args, + ... + ) + ) +} + +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export +StatManual <- ggproto( + "StatManual", Stat, + + setup_params = function(data, params) { + params$fun <- allow_lambda(params$fun) + check_function(params$fun, arg = "fun") + params + }, + + compute_group = function(data, scales, fun = identity, args = list()) { + as_gg_data_frame(inject(fun(data, !!!args))) + } +) diff --git a/_pkgdown.yml b/_pkgdown.yml index 1e4ea6a727..593f015590 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -68,6 +68,7 @@ reference: - stat_summary_bin - stat_unique - stat_sf_coordinates + - stat_manual - after_stat - subtitle: Position adjustment diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index c3384f1e45..056c106023 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -23,10 +23,10 @@ % R/scale-identity.R, R/stat-align.R, R/stat-bin.R, R/stat-bin2d.R, % R/stat-bindot.R, R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, % R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, -% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-qq-line.R, -% R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-2d.R, R/stat-summary-bin.R, R/stat-summary-hex.R, -% R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R +% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-manual.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, +% R/stat-smooth.R, R/stat-sum.R, R/stat-summary-2d.R, R/stat-summary-bin.R, +% R/stat-summary-hex.R, R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} \alias{ggplot2-ggproto} @@ -139,6 +139,7 @@ \alias{StatEllipse} \alias{StatFunction} \alias{StatIdentity} +\alias{StatManual} \alias{StatQqLine} \alias{StatQq} \alias{StatQuantile} diff --git a/man/stat_manual.Rd b/man/stat_manual.Rd new file mode 100644 index 0000000000..f19a03128e --- /dev/null +++ b/man/stat_manual.Rd @@ -0,0 +1,199 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-manual.R +\name{stat_manual} +\alias{stat_manual} +\title{Manually compute transformations} +\usage{ +stat_manual( + mapping = NULL, + data = NULL, + geom = "point", + position = "identity", + ..., + fun = identity, + args = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{fun}{Function that takes a data frame as input and returns a data +frame or data frame-like list as output. The default (\code{identity()}) returns +the data unchanged.} + +\item{args}{A list of arguments to pass to the function given in \code{fun}.} + +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with +a warning. If \code{TRUE}, missing values are silently removed.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +\code{stat_manual()} takes a function that computes a data transformation for +every group. +} +\section{Aesthetics}{ + +\code{stat_manual()} understands the following aesthetics (required aesthetics are in bold): +\itemize{ +\item \code{\link[=aes_group_order]{group}} +} +Learn more about setting these aesthetics in \code{vignette("ggplot2-specs")}. + + +Input aesthetics are determined by the \code{fun} argument. Output aesthetics must +include those required by \code{geom}. Any aesthetic that is constant within a +group will be preserved even if dropped by \code{fun}. +} + +\examples{ +# A standard scatterplot +p <- ggplot(mtcars, aes(disp, mpg, colour = factor(cyl))) + + geom_point() + +# The default just displays points as-is +p + stat_manual() + +# Using a custom function +make_hull <- function(data) { + hull <- chull(x = data$x, y = data$y) + data.frame(x = data$x[hull], y = data$y[hull]) +} + +p + stat_manual( + geom = "polygon", + fun = make_hull, + fill = NA +) + +# Using the `with` function with quoting +p + stat_manual( + fun = with, + args = list(expr = quote({ + hull <- chull(x, y) + list(x = x[hull], y = y[hull]) + })), + geom = "polygon", fill = NA +) + +# Using the `transform` function with quoting +p + stat_manual( + geom = "segment", + fun = transform, + args = list( + xend = quote(mean(x)), + yend = quote(mean(y)) + ) +) + +# Using dplyr verbs with `vars()` +if (requireNamespace("dplyr", quietly = TRUE)) { + + # Get centroids with `summarise()` + p + stat_manual( + size = 10, shape = 21, + fun = dplyr::summarise, + args = vars(x = mean(x), y = mean(y)) + ) + + # Connect to centroid with `mutate` + p + stat_manual( + geom = "segment", + fun = dplyr::mutate, + args = vars(xend = mean(x), yend = mean(y)) + ) + + # Computing hull with `reframe()` + p + stat_manual( + geom = "polygon", fill = NA, + fun = dplyr::reframe, + args = vars(hull = chull(x, y), x = x[hull], y = y[hull]) + ) +} +} diff --git a/tests/testthat/test-stat-manual.R b/tests/testthat/test-stat-manual.R new file mode 100644 index 0000000000..5e2ca54376 --- /dev/null +++ b/tests/testthat/test-stat-manual.R @@ -0,0 +1,18 @@ +test_that("stat_manual can take a function", { + + centroid <- function(data) data.frame(x = mean(data$x), y = mean(data$y)) + + layer <- get_layer_data( + ggplot(mtcars, aes(disp, mpg, colour = factor(cyl))) + + stat_manual(fun = centroid, size = 5, shape = 21) + ) + + expect_equal( + layer$x, + vapply(split(mtcars$disp, mtcars$cyl), mean, numeric(1), USE.NAMES = FALSE) + ) + expect_equal( + layer$y, + vapply(split(mtcars$mpg, mtcars$cyl), mean, numeric(1), USE.NAMES = FALSE) + ) +})