Merge pull request #18 from isoverse/dev

update to v 0.1.0
isoverse · Sep 15, 2021 · 3578071 · 3578071
2 parents 4131713 + 3b19df0
commit 3578071
Show file tree

Hide file tree

Showing 96 changed files with 1,583 additions and 1,180 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,13 +2,14 @@ Package: clumpedr
 Title: Clumped isotope analysis in R
 Version: 0.0.2
 Authors@R:
-    person(given = "Ilja",
-           family = "Kocken",
-           role = c("aut", "cre"),
-           email = "[email protected]")
+    person(
+        given = "Ilja", family = "Kocken",
+        email = "[email protected]",
+        role = c("aut", "cre"),
+        comment = c(ORCID = "0000-0003-2196-8718"))
 URL: https://github.com/isoverse/clumpedr
 BugReports: https://github.com/isoverse/clumpedr/issues
-Description: Data processing in clumped isotopes is an involved, evolving field. This package aims facilitate the processing of raw data to final Δ47 values and temperatures, step-by-step, with good documentation, so that the full process is easier to understand and tweak, more reproducible, and fully transparent.
+Description: Data processing in clumped isotopes is an involved, evolving field. This package aims to facilitate the processing of raw data to final \eqn{\Delta_{47}}{Δ47} values and temperatures, step-by-step, with good documentation, so that the full process is easier to understand and tweak, more reproducible, and fully transparent.
 Encoding: UTF-8
 LazyData: true
 VignetteBuilder: knitr
@@ -17,7 +18,7 @@ Depends:
     isoreader (>= 1.0.9)
 License: GPL-3 + file LICENSE
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.1
 Imports:
     tidyr (>= 0.8.3.9000),
     magrittr,
@@ -36,7 +37,8 @@ Suggests:
     ggridges,
     naniar,
     tictoc,
-    knitr
+    knitr,
+    future
 Remotes:
     isoverse/isoreader,
     tidyverse/tidyr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,40 +8,33 @@ export(append_expected_values)
 export(append_ref_deltas)
 export(apply_etf)
 export(bulk_and_clumping_deltas)
-export(calculate_etf)
-export(clean_did_info)
 export(clumpedr_get_default_parameters)
 export(clumpedr_show_default_parameters)
 export(clumpedr_turn_info_messages_off)
 export(clumpedr_turn_info_messages_on)
-export(collapse_cycles)
 export(correct_backgrounds)
 export(delta_values)
-export(disable_cycles)
 export(empirical_transfer_function)
 export(find_bad_cycles)
+export(find_init_outliers)
+export(find_internal_sd_outlier)
 export(find_outliers)
+export(find_session_id1_outlier)
+export(find_session_outlier)
 export(get_inits)
 export(get_ref_delta)
 export(iso_turn_info_messages_off)
 export(iso_turn_info_messages_on)
-export(isobar_ratios)
 export(little_deltas)
-export(match_intensities)
+export(nest_cycle_data)
 export(parse_info)
 export(pipe_plot)
-export(plot_base)
-export(plot_delta)
-export(plot_disabled_cycles)
-export(plot_etf)
-export(plot_outliers)
-export(plot_raw_delta)
-export(plot_stds)
 export(remove_outliers)
 export(revcal)
 export(set_temp)
-export(spread_intensities)
 export(spread_match)
+export(summarise_outlier)
+export(summarize_outlier)
 export(tempcal)
 export(temperature_axis)
 export(temperature_calculation)
@@ -83,30 +76,6 @@ importFrom(dplyr,summarize)
 importFrom(dplyr,tally)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
-importFrom(ggplot2,"%+%")
-importFrom(ggplot2,aes)
-importFrom(ggplot2,aes_)
-importFrom(ggplot2,expand_limits)
-importFrom(ggplot2,facet_grid)
-importFrom(ggplot2,facet_wrap)
-importFrom(ggplot2,geom_hline)
-importFrom(ggplot2,geom_line)
-importFrom(ggplot2,geom_point)
-importFrom(ggplot2,geom_smooth)
-importFrom(ggplot2,geom_violin)
-importFrom(ggplot2,ggplot)
-importFrom(ggplot2,is.ggplot)
-importFrom(ggplot2,labs)
-importFrom(ggplot2,mean_cl_normal)
-importFrom(ggplot2,scale_alpha_manual)
-importFrom(ggplot2,scale_colour_manual)
-importFrom(ggplot2,scale_shape_manual)
-importFrom(ggplot2,scale_size_manual)
-importFrom(ggplot2,scale_x_continuous)
-importFrom(ggplot2,scale_y_continuous)
-importFrom(ggplot2,stat_summary)
-importFrom(ggplot2,theme)
-importFrom(ggplot2,theme_bw)
 importFrom(glue,glue)
 importFrom(glue,glue_collapse)
 importFrom(isoreader,iso_get_file_info)

diff --git a/R/abundance_ratios.R b/R/abundance_ratios.R
@@ -22,11 +22,15 @@ abundance_ratios <- function(.data,
                              i49 = s49,
                              R45 = R45, R46 = R46, R47 = R47,
                              R48 = R48, R49 = R49, quiet = default(quiet)) {
+  if (nrow(.data) == 0L) {
+    return(tibble(file_id = character()))
+  }
+
   # global variables and defaults
   s44 <- s45 <- s46 <- s47 <- s48 <- s49 <- NULL
 
   if (!quiet)
-    message("Calculating abundance ratios R[i] = i / 44")
+    message("Info: calculating abundance ratios R[i] = i / 44")
 
   .data %>%
     mutate({{ R45 }} := {{ i45 }} / {{ i44 }},

diff --git a/R/add_info.R b/R/add_info.R
@@ -6,13 +6,21 @@
 #'   [collapse_cycles()].
 #' @param .info A [tibble][tibble::tibble-package], resulting from
 #'   [clean_did_info()].
+#' @param cols  A character vector with column names in info to add to the data.
 #' @param ... Optional additional arguments to pass to the [dplyr::left_join()]
 #'   call.
 #' @export
 #' @family metadata cleaning functions
-add_info <- function(.data, .info, ..., quiet = default(quiet)) {
+add_info <- function(.data, .info, cols, quiet = default(quiet)) {
+  if (nrow(.data) == 0) {
+    return(tibble(file_id = character()))
+  }
+
+  if (!"file_id" %in% cols) {
+    cols <- c("file_id", cols)
+  }
+
   if (!quiet)
     message("Info: appending measurement information.")
-  .data %>%
-      left_join(.info, ..., by = "file_id")
+    left_join(x = .data, y = .info %>% select(tidyselect::all_of(cols)), by = "file_id")
 }
diff --git a/R/append_expected_values.R b/R/append_expected_values.R
@@ -5,11 +5,10 @@
 #'
 #' @param .data A [tibble][tibble::tibble-package].
 #' @param std_names Names of the standards.
-#' @param D47 Expected values of the standards at 25 °C. Defaults to Müller et
-#'   al., 2017.
-#' @inheritParams acid_fractionation
-#' @param id1 Name of the standard/sample identifier column.
+#' @param std_values Expected values of the standards. Defaults to Müller et
+#'   al., 2017 at 70 °C acid digestion.
 #' @param exp Name of the new column that will hold expected values.
+#' @param by Name of the standard/sample identifier column.
 #'
 #' @references
 #' W. F. Defliese, M.T. Hren, K. C. Lohmann. Compositional and temperature
@@ -26,21 +25,35 @@
 #' long-integration dual-inlet (LIDI) workflow: scratching at the lower sample
 #' weight boundaries. _Rapid Commun. Mass Spectrom._ **2017**, _31_,
 #' 1057--1066.
-#' @family empirical transfer functions
+#'
 #' @export
+#' @family empirical transfer functions
 append_expected_values <- function(.data,
                                    std_names = paste0("ETH-", 1:3),  # we don't use ETH-4!
-                                   D47 = c(0.258, 0.256, 0.691), #, 0.507),
-                                   aff = 0.062,
-                                   id1 = `Identifier 1`,
-                                   exp = expected_D47) {
+                                   std_values = c(0.258, 0.256, 0.691) - 0.062, #, 0.507),
+                                   exp = expected_D47,
+                                   by = `Identifier 1`,
+                                   quiet = default(quiet)) {
   # global variables and defaults
   `Identifier 1` <- expected_D47 <- NULL
 
-  # TODO: vectorize this, so you can add as many standards as desired?
+  if (length(std_names) != length(std_values))
+    stop("std_names should be of equal length to std_values.")
+
+  if (anyNA(std_values)) {
+    std_names <- std_names[!is.na(std_values)]
+    std_values <- std_values[!is.na(std_values)]
+  }
+
+  if (!quiet)
+    glue("Info: Appending expected values as {quo_name(enquo(exp))} for standards {glue::glue_collapse(unique(std_names), sep = ' ', last = ' and ', width = 30)}") %>%
+      message()
+
+  expected_standard_values <- tibble({{ by }} := std_names,
+                                     {{ exp }} := std_values)
+
+  by_quo_name <- quo_name(enquo(by))
+
   .data %>%
-    mutate({{ exp }} := case_when({{ id1 }} == std_names[[1]] ~ D47[[1]] - aff,
-                               {{ id1 }} == std_names[[2]] ~ D47[[2]] - aff,
-                               {{ id1 }} == std_names[[3]] ~ D47[[3]] - aff,
-                               TRUE ~ NA_real_))
+    left_join(expected_standard_values, by = by_quo_name)
 }
diff --git a/R/apply_etf.R b/R/apply_etf.R
@@ -6,19 +6,23 @@
 #' @details Note that the intercept and slope were calculated with the
 #'   dependent and independent variables in the other direction, so we flip
 #'   them here. i.e.: \deqn{\Delta_{47etf} = - (\alpha / \beta) + (1 / \beta)
-#'   \times \Delta_{47_raw}}{Δ47_etf = - (α / β) + (1 / β) * Δ47_raw}
+#'   \times \Delta_{47_raw}}{Δ47[etf] = - (α / β) + (1 / β) * Δ[47_raw]}
 #'
 #' @param .data A [tibble][tibble::tibble-package] containing column D47.
 #' @param intercept The column name with the ETF intercept.
 #' @param slope The column name with the ETF slope.
-#' @param D47 The column with \eqn{\Delta_{47}}{Δ47} values to use.
-#' @param D47_out The new column name.
+#' @param raw The column with raw values to apply the ETF to.
+#' @param out The new column name.
 #' @family empirical transfer functions
 #' @export
-apply_etf <- function(.data, intercept = intercept, slope = slope, D47 = D47_raw, D47_out = D47_etf) {
+apply_etf <- function(.data, intercept = intercept, slope = slope, raw = D47_raw, out = D47_etf, quiet = default(quiet)) {
   # defaults
   D47_raw <- D47_etf <- NULL
 
+  if (!quiet)
+    glue("Info: Applying ETF to {quo_name(enquo(raw))} using \u03b1 = {quo_name(enquo(slope))} and \u03b2 = {quo_name(enquo(intercept))}.") %>%
+      message()
+
   .data %>%
-    mutate({{D47_out}} := - (.data$intercept / .data$slope) + (1 / .data$slope) * {{D47}})
+    mutate({{ out }} := - ({{ intercept }} / {{ slope }}) + (1 / {{ slope }}) * {{ raw }})
 }
diff --git a/R/bulk_and_clumping_deltas.R b/R/bulk_and_clumping_deltas.R
@@ -14,11 +14,11 @@
 #' @param d49 Column name of d49.
 #' @inheritParams isobar_ratios
 #' @inheritParams default.params
-#' @export
 #' @references Daëron, M., Blamart, D., Peral, M., & Affek, H. P., Absolute
 #'   isotopic abundance ratios and the accuracy of \eqn{\Delta_{47}}{Δ47}
 #'   measurements, _Chemical Geology_ **2016**, _442_, 83–96.
 #'   \url{http://dx.doi.org/10.1016/j.chemgeo.2016.08.014}
+#' @export
 bulk_and_clumping_deltas  <- function(.data,
                                       # make the column names a bit flexible
                                       ## d13C_PDB_wg=d13C_PDB_wg,
@@ -32,6 +32,10 @@ bulk_and_clumping_deltas  <- function(.data,
                                       R18_PDBCO2 = default(R18_PDBCO2),
                                       lambda = default(lambda),
                                       D17O = default(D17O), quiet = default(quiet)) {
+  if (nrow(.data) == 0L) {
+    return(tibble(file_id = character()))
+  }
+
   # global variables and defaults
   R18_wg <- R13_wg <- R45_wg <- R46_wg <- R47_wg <- R48_wg <- R49_wg <- R45_stoch <-
     R46_stoch <- R47_stoch <- R48_stoch <- R49_stoch <- NULL
@@ -85,10 +89,16 @@ bulk_and_clumping_deltas  <- function(.data,
     R45_flag = (.data$R45 / .data$R45_stoch - 1),
     R46_flag = (.data$R46 / .data$R46_stoch - 1),
     # Compute raw clumped isotope anomalies
+    D45_raw = 1000 * (.data$R45 / .data$R45_stoch - 1),
+    D46_raw = 1000 * (.data$R46 / .data$R46_stoch - 1),
     D47_raw = 1000 * (.data$R47 / .data$R47_stoch - 1),
     D48_raw = 1000 * (.data$R48 /.data$R48_stoch - 1),
     D49_raw = 1000 * (.data$R49 / .data$R49_stoch - 1),
-    param_49 = (.data$s49 / .data$s44 - .data$r49 / .data$r44) * 1000)
+    param_49 = (.data$s49 / .data$s44 - .data$r49 / .data$r44) * 1000#,
+    # EXPERIMENTAL NEW STEP! -> undo on [2021-02-12], nobody knows why we do this
+    ## D47_raw = D47_raw - D46_raw - D45_raw,
+    ## D48_raw = D48_raw - D46_raw * 2
+    )
 
   ## # raise a warning if the corresponding anomalies exceed 0.02 ppm.
   ## # TODO: append warning to specific value!

diff --git a/R/calculate_etf.R b/R/calculate_etf.R
@@ -1,16 +1,26 @@
 #' Calculate the Empirical Transfer Function
 #'
 #' @param .data A [tibble][tibble::tibble-package].
-# #' @param outlier Column with sample outlier information. Looks up string `"no_outlier"`.
 #' @param raw Column name of raw \eqn{\Delta_{47}}{Δ47} values.
 #' @param exp Column name of expected \eqn{\Delta_{47}}{Δ47} values.
 #' @param session The column name to group analyses by. Defaults to
 #'   `Preparation`.
-#' @export
+#' @param etf The column name of the new model.
+#' @param etf_coefs The column name with the coefficients of the model.
+#' @param slope The column name of the new slope.
+#' @param intercept The column name of the new intercept.
+#' @param parallel Whether or not (default) to process this in parallel, using package `furrr`.
 #' @importFrom stats na.exclude
 calculate_etf <- function(.data, raw = D47_raw_mean, exp = expected_D47,
-                          session = Preparation, quiet = default(quiet)) {
+                          session = Preparation, etf = etf,
+                          etf_coefs = etf_coefs, slope = slope,
+                          intercept = intercept, parallel = FALSE, quiet = default(quiet)) {
   # global variables and defaults
+  if (parallel & !requireNamespace("furrr", quietly = TRUE)) {
+    stop("Package \"furrr\" is needed for this function to work. Please install it or run this with `parallel = FALSE`",
+      call. = FALSE)
+  }
+
   outlier <- D47_raw_mean <- expected_D47 <- Preparation <- NULL
 
   # this makes it continue even if lm fails.
@@ -23,16 +33,30 @@ calculate_etf <- function(.data, raw = D47_raw_mean, exp = expected_D47,
     },
     otherwise = NA_real_, quiet = quiet)
 
-  pos_tidy <- purrr::possibly(broom::tidy, otherwise=NA_real_, quiet=quiet)
-  pos_map_dbl <- purrr::possibly(map_dbl, otherwise=NA_real_, quiet=quiet)
+  if (!quiet)
+    glue("Info: Calculating ETF with {quo_name(enquo(raw))} as a function of {quo_name(enquo(exp))} for each {quo_name(enquo(session))}.") %>%
+      message()
+
+  if (parallel) {
+    out <- .data %>%
+      nest(session_nested = -{{ session }}) %>%
+      mutate({{ etf }} := furrr::future_map(session_nested, pos_lm),
+      {{ etf_coefs }} := furrr::future_map({{ etf }}, "coefficients"),
+      {{ intercept }} := furrr::future_map_dbl({{ etf_coefs }}, 1),
+      {{ slope }} := furrr::future_map_dbl({{ etf_coefs }}, 2)) %>%
+      unnest(cols = .data$session_nested)
+  } else {
+    out <- .data %>%
+      nest(session_nested = -{{ session }}) %>%
+      mutate({{ etf }} := map(session_nested, pos_lm),
+      {{ etf_coefs }} := map({{ etf }}, "coefficients"),
+      {{ intercept }} := pos_map_dbl({{ etf_coefs }}, 1),
+      {{ slope }} := pos_map_dbl({{ etf_coefs }}, 2)) %>%
+      unnest(cols = .data$session_nested)
+  }
 
-  .data %>%
-    group_by({{ session }}) %>%
-    nest() %>%
-    mutate(etf = map(data, pos_lm),
-           etf_coefs=map(.data$etf, "coefficients"),
-           intercept=map_dbl(.data$etf_coefs, 1),
-           slope=map_dbl(.data$etf_coefs, 2)) %>%
-    ## select(-etf_coefs) %>%
-    unnest(cols = .data$data)
+  out
 }
+
+pos_tidy <- purrr::possibly(broom::tidy, otherwise=NA_real_, quiet=default(quiet))
+pos_map_dbl <- purrr::possibly(map_dbl, otherwise=NA_real_, quiet=default(quiet))