From 7192f3ded7f2200d15bfbd5ad3f6e43acb46488c Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 15 Nov 2024 12:50:34 +1100 Subject: [PATCH] Remove function `summarise_trait_means` (#125) As described in issue #123 the function is outputting inaccurate results if presented with a database with data from multiple datasets. We are removing it until we build a more sophisticated function. Closes issue #123 --- NAMESPACE | 1 - R/summarise_trait_values.R | 64 --------------------- R/trait_pivot_longer.R | 2 +- _pkgdown.yml | 1 - man/summarise_trait_means.Rd | 23 -------- man/trait_pivot_longer.Rd | 2 +- tests/testthat/test-summarise_trait_means.R | 34 ----------- vignettes/austraits.Rmd.orig | 9 --- 8 files changed, 2 insertions(+), 134 deletions(-) delete mode 100644 R/summarise_trait_values.R delete mode 100644 man/summarise_trait_means.Rd delete mode 100644 tests/testthat/test-summarise_trait_means.R diff --git a/NAMESPACE b/NAMESPACE index 998dc4a..d132b9e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,7 +31,6 @@ export(plot_site_locations) export(plot_trait_distribution_beeswarm) export(separate_trait_values) export(summarise_database) -export(summarise_trait_means) export(trait_pivot_longer) export(trait_pivot_wider) import(RefManageR) diff --git a/R/summarise_trait_values.R b/R/summarise_trait_values.R deleted file mode 100644 index b940e97..0000000 --- a/R/summarise_trait_values.R +++ /dev/null @@ -1,64 +0,0 @@ -#' Compute mean trait values for studies that have multiple observations for a given trait -#' -#' @param trait_data the traits table in a traits.build database -#' -#' @return A reduced trait table, mean values are flagged with the suffix '_summarised' in value_type -#' @export -#' -#' @examples -#' \dontrun{ -#' data <- austraits$traits %>% filter(dataset_id == "Falster_2003") -#' data %>% summarise_trait_means() -#' } - - -summarise_trait_means <- function(trait_data){ - cli::cli_alert_danger("Use with caution: Trait means are summarised using an uninformed method. Deprecation due in 2025") - - - suppressWarnings( - trait_data %>% - dplyr::mutate(value = as.numeric(.data$value), - replicates = as.numeric(.data$replicates)) -> trait_data - ) - - # Identify which ones need summarising - target <- trait_data %>% - dplyr::group_by(trait_name, observation_id) %>% - dplyr::summarise(dplyr::n()) %>% - dplyr::filter(`dplyr::n()` > 1) %>% - dplyr::select("trait_name", observation_id) - - # # Identify which ones that don't need to change - original <- trait_data %>% - dplyr::group_by(trait_name, observation_id) %>% - dplyr::summarise(dplyr::n()) %>% - dplyr::filter(! `dplyr::n()` > 1) %>% - dplyr::select("trait_name", "observation_id") - - original_df <- purrr::map2_dfr(original$trait_name, original$observation_id, - ~ dplyr::filter(trait_data, trait_name == .x & observation_id == .y)) - - # Filter out the ones where nrows is > 1 - target_ls <- purrr::map2(target$trait_name, target$observation_id, - ~ dplyr::filter(trait_data, trait_name == .x & observation_id == .y) - ) - - # Manipulate: Compute means, update value type and replicates - target_summarised <- purrr::map(target_ls, - ~ .x %>% dplyr::mutate(value = mean(value, na.rm = TRUE), - value_type = paste0(value_type, "_summarised"), - replicates = sum(replicates)) %>% - dplyr::filter(dplyr::row_number() == 1) - - ) - - - target_bound <- target_summarised %>% dplyr::bind_rows() - - # Append back to the ones where nrows = 1 - ret <- dplyr::bind_rows(original_df, target_bound) - - # Sort by observation_id and return - ret %>% dplyr::arrange(.data$observation_id) -} \ No newline at end of file diff --git a/R/trait_pivot_longer.R b/R/trait_pivot_longer.R index 70dd44c..9c6fe67 100644 --- a/R/trait_pivot_longer.R +++ b/R/trait_pivot_longer.R @@ -7,7 +7,7 @@ #' @param wide_data output from trait_pivot_wider. For <= v3.0.2 list object containing wide data generated,For > v3.0.2 a tibble of wide data #' @return A tibble in long format #' @details -#' - If `bind_trait_values` or `summarise_trait_means` was applied prior to `trait_pivot_wider` for AusTraits +#' - If `bind_trait_values` was applied prior to `trait_pivot_wider` for AusTraits #' <= v3.0.2, `trait_pivot_longer` will return a tibble with fewer observations than the original traits table. #' - For AusTraits version >3.0.2, `trait_pivot_longer` will return a tibble with fewer columns than that original traits table #' - The excluded columns include: "unit", "replicates", "measurement_remarks", "basis_of_record", "basis_of_value" diff --git a/_pkgdown.yml b/_pkgdown.yml index f11b161..9099e2c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,7 +25,6 @@ reference: - contents: - starts_with("extract") - starts_with("trait_pivot") - - summarise_trait_means - starts_with("join") - ends_with("trait_values") - as_wide_table diff --git a/man/summarise_trait_means.Rd b/man/summarise_trait_means.Rd deleted file mode 100644 index 13831ce..0000000 --- a/man/summarise_trait_means.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summarise_trait_values.R -\name{summarise_trait_means} -\alias{summarise_trait_means} -\title{Compute mean trait values for studies that have multiple observations for a given trait} -\usage{ -summarise_trait_means(trait_data) -} -\arguments{ -\item{trait_data}{the traits table in a traits.build database} -} -\value{ -A reduced trait table, mean values are flagged with the suffix '_summarised' in value_type -} -\description{ -Compute mean trait values for studies that have multiple observations for a given trait -} -\examples{ -\dontrun{ -data <- austraits$traits \%>\% filter(dataset_id == "Falster_2003") -data \%>\% summarise_trait_means() -} -} diff --git a/man/trait_pivot_longer.Rd b/man/trait_pivot_longer.Rd index cd82781..bf2ff7c 100644 --- a/man/trait_pivot_longer.Rd +++ b/man/trait_pivot_longer.Rd @@ -22,7 +22,7 @@ In other words, trait_pivot_longer reverts the actions of trait_pivot_wider } \details{ \itemize{ -\item If \code{bind_trait_values} or \code{summarise_trait_means} was applied prior to \code{trait_pivot_wider} for AusTraits +\item If \code{bind_trait_values} was applied prior to \code{trait_pivot_wider} for AusTraits <= v3.0.2, \code{trait_pivot_longer} will return a tibble with fewer observations than the original traits table. \item For AusTraits version >3.0.2, \code{trait_pivot_longer} will return a tibble with fewer columns than that original traits table \itemize{ diff --git a/tests/testthat/test-summarise_trait_means.R b/tests/testthat/test-summarise_trait_means.R deleted file mode 100644 index 295ed4e..0000000 --- a/tests/testthat/test-summarise_trait_means.R +++ /dev/null @@ -1,34 +0,0 @@ -test_that("Function output is correct", { - target <- austraits_5.0.0_lite$traits %>% - dplyr::group_by(trait_name, observation_id) %>% - dplyr::summarise(dplyr::n()) %>% - dplyr::ungroup() %>% - dplyr::filter(`dplyr::n()` > 1) %>% - dplyr::select(trait_name, observation_id) - - original <- austraits_5.0.0_lite$traits %>% - dplyr::group_by(trait_name, observation_id) %>% - dplyr::summarise(dplyr::n()) %>% - dplyr::ungroup() %>% - dplyr::filter(! `dplyr::n()` > 1) %>% - dplyr::select(trait_name, observation_id) - - # The final output should have nrow as original plus eventual number of summarised obs - # `summarise_trait_means` only works with austratis_3.0, because afterwards would need to drop columns to meaningfully take means - # so no errors, but had to comment out `expect_equal` test and output has same number of rows as input - # this is expected looking at the code for `summarise_trait_means` - expect_message(out <- summarise_trait_means(extract_dataset(austraits_5.0.0_lite, "Crous_2013")$traits)) - expect_visible(out) - #expect_equal( out %>% nrow(), ( nrow(original) + nrow(target)) ) - expect_named(out) - expect_type(out, "list") -}) - -test_that("Function throws error", { - expect_error(summarise_trait_means(austraits_3.0.2_lite)) - expect_error(summarise_trait_means(austraits_3.0.2_lite$sites)) - expect_error(summarise_trait_means(austraits_3.0.2_lite$taxa)) -}) - - - diff --git a/vignettes/austraits.Rmd.orig b/vignettes/austraits.Rmd.orig index 5f1e551..36db58a 100644 --- a/vignettes/austraits.Rmd.orig +++ b/vignettes/austraits.Rmd.orig @@ -231,13 +231,4 @@ data_wide_bound %>% separate_trait_values(., austraits$definitions) ``` -#### `summarise_trait_means` - -```{r, pivot_summarised} -data_wide_summarised <- data_falster_studies$traits %>% - summarise_trait_means() %>% - trait_pivot_wider() - -data_wide_summarised$value %>% head() -```