allow select syntax for summary table

#14
isoverse · Mar 27, 2019 · 73385f5 · 73385f5
1 parent 5291534
commit 73385f5
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 40 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: isoprocessor
 Title: IRMS data processor
 Description: Data processing and reduction pipelines for isotope ratio mass spectrometry (IRMS) data
-Version: 0.3.0
+Version: 0.3.1
 Authors@R: person("Sebastian", "Kopf", email = "[email protected]",
     role = c("aut", "cre"))
-URL: https://github.com/kopflab/isoprocessor
-BugReports: https://github.com/kopflab/isoprocessor/issues
+URL: https://github.com/isoverse/isoprocessor
+BugReports: https://github.com/isoverse/isoprocessor/issues
 Depends: 
     R (>= 3.3.0),
     isoreader (>= 1.0.2)

diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,7 @@ export(iso_remove_problematic_peaks)
 export(iso_reset_default_processor_parameters)
 export(iso_set_default_processor_parameters)
 export(iso_show_default_processor_parameters)
+export(iso_summarize_data_table)
 export(iso_turn_info_messages_off)
 export(iso_turn_info_messages_on)
 export(iso_unnest_calibration_coefs)

diff --git a/R/utilities.R b/R/utilities.R
@@ -41,51 +41,53 @@ iso_print_data_table <- function(dt, select = everything(), filter = TRUE, print
 
 }
 
-#' Generate an overview table with the data
+#' Alias for summarize data table
+#' @param ... deprecated
+#' @export
+iso_generate_summary_table <- function(...) {
+  warning("this function was renamed --> calling iso_generate_summary_table() instead",
+          immediate. = TRUE, call. = FALSE)
+  iso_generate_summary_table(...)
+}
+
+# FIXME: write units tests
+#' Summarize a data table
 #'
-#' Convenience function to summarize means and standard deviationsfor one or multiple data columns. Use \link[dplyr]{group_by} prior to calling this function to generate the data table for individual subsets of the data. The generated data table always includes an \code{n} column with the number of records per group.
+#' Convenience function to summarize means and standard deviations for one or multiple data columns. Use \link[dplyr]{group_by} prior to calling this function to generate the data table for individual subsets of the data. The generated data table always includes an \code{n} column with the number of records per group. If no column(s) are specified, will automatically summarize all columns except for the grouping columns.
 #'
 #' @param dt data table, can already have a group_by if so desired
-#' @param ... which data columns to include in data overview
+#' @param ... which data columns to include in data overview. All \link[dplyr]{select} style syntax is supported (including on the fly renaming). If no columns are specified, will summarize all numeric columns (excluding any grouping columns).
 #' @param cutoff the minimum number of records per group in order to include the group
 #' @export
-iso_generate_summary_table <- function(dt, ..., cutoff = 1) {
-
-  # safety checks
-  include <- ensyms(...)
-  if (length(include) == 0)
+iso_summarize_data_table <- function(dt, ..., cutoff = 1) {
+
+  # get column selectors
+  dots <- quos(...)
+  if (length(dots) == 0) dots <- quos(everything())
+
+  # find columns
+  grp_vars <- dplyr::group_vars(dt) %>% { setNames(., .) }
+  vars <- tidyselect::vars_select(names(dt), !!!dots) %>%
+    # exclude grouping variables
+    { .[!. %in% grp_vars] } %>%
+    # only numeric
+    { .[purrr::map_lgl(., ~is.numeric(dt[[.x]]))] }
+
+  # safety check
+  if (length(vars) == 0)
     stop("no data columns provided, please select at least 1", call. = FALSE)
 
-  # add n
-  include <- quos(!!!c(sym("n"), include))
-
-  # selects
-  select_quos <- quos(!!!unlist(
-    c(groups(dt),
-      map2(
-        unname(include),
-        names(include),
-        function(col, name) {
-          mean <- str_c(quo_text(col), "_mean")
-          sd <- str_c(quo_text(col), "_sd")
-          mean_name <- if(nchar(name) > 0) str_c(name, "_mean") else mean
-          sd_name <- if(nchar(name) > 0) str_c(name, "_sd") else sd
-          list(sym(mean), sym(sd)) %>% setNames(c(mean_name, sd_name))
-        }
-      ))))
-
-  # generate overview
+  # generate mutate quos
+  summarize_funcs <-
+    tibble(var = names(vars)) %>% tidyr::crossing(tibble(func = c("mean", "sd"))) %>%
+    dplyr::mutate(name = paste(var, func)) %>%
+    with(purrr::map2(var, func, ~quo((!!.y)(!!sym(.x)))) %>% setNames(name))
+
+  # generate summary
   dt %>%
-    mutate(n = n()) %>%
-    summarize_at(include, funs(mean, sd)) %>%
-    # bring in right order
-    select(!!!select_quos) %>%
-    # clean up counts (n)
-    rename(n = n_mean) %>%
-    select(-n_sd) %>%
-    mutate(n = as.integer(n)) %>%
-    filter(n >= cutoff) %>%
-    arrange(desc(n))
+    dplyr::select(!!!c(grp_vars, vars)) %>%
+    dplyr::summarize(n = n(), !!!summarize_funcs) %>%
+    dplyr::filter(n >= cutoff)
 }