Changes for v5 austraits.build (#78)

* changes required for v5 austraits.build * Removed original_name for trait_pivot_wider3 for v5.0.0 * Added trait_pivot_wider for v4.x.x and code for what_version * Making new switches for join and as_wide_table based on new versioning * Sub switch for extract_ and recreated internal data * Sub switches for trait_pivot_longer * Minor fix in join_methods * Added vars a global vars * Removed .data calls when not needed * Update to work with latest zenodo API (#81) - As documented in #79 , the Zenodo API has changed, breaking our download feature. - This commit updates the internals to work with the latest changes. Specifically: - the way to access json for all versions has changed (changed url structure, and for id we now use one of the record ids, rather than the conceptid) - the call to download file has changed - format of the API json has changed Also - added record id to the table of versions - put a check in to remove "v" from any version entered by user * Recreated data so extract is passing * Update `treatment_id` with `treatment_context_id` * Revert "Update `treatment_id` with `treatment_context_id`" This reverts commit 3fc6717. * minor column name changes Changes column names, reflecting recent changes to traits.build output. * Update as_wide_table.R add `any_of` to column selection within `as_wide_table` to accommodate other traits.build databases that don't have the same columns in taxon_list.csv * Fixed getting versions and load austraits with zenodo updates and minor update with as_wide_table with removal of variable * Fixed minor bug in get_version_latest --------- Co-authored-by: Elizabeth Wenk <[email protected]> Co-authored-by: Daniel Falster <[email protected]> Co-authored-by: yangsophieee <[email protected]>
traitecoevo · Nov 21, 2023 · f9c1666 · f9c1666
1 parent 17de18c
commit f9c1666
Show file tree

Hide file tree

Showing 18 changed files with 326 additions and 126 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,7 @@ Encoding: UTF-8
 Language: en
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Depends: 
     R (>= 4.0.0),
     RefManageR
@@ -41,12 +41,13 @@ Imports:
     janitor,
     lifecycle,
     ggplot2,
-    ggpointdensity, 
+    ggpointdensity,
     ggbeeswarm (>= 0.7.1),
     gridExtra,
     scales,
     forcats,
-    viridis
+    viridis,
+    lubridate
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/R/as_wide_table.R b/R/as_wide_table.R
@@ -19,20 +19,21 @@ as_wide_table <- function(austraits){
   version <- what_version(austraits)
 
   switch (version,
-          'new' = as_wide_table2(austraits),
-          'old' = as_wide_table1(austraits),
-  )
+          "5-series" = as_wide_table3(austraits),
+          "4-series" = as_wide_table2(austraits),
+          "3-series-earlier" = as_wide_table1(austraits)
+          )
 }
 
-#' Turning entire AusTraits object into wide table >3.0.2
+#' Turning entire AusTraits object into wide table v5
 #' @noRd
 #' @keywords internal
-as_wide_table2 <- function(austraits){
+as_wide_table3 <- function(austraits){
 
   # Function to collapse columns in locations and contexts into single column
-  process_table2 <- function(data) {
+  process_table3 <- function(data) {
     data %>% 
-      tidyr::pivot_wider(names_from = property, values_from = value) %>% 
+      tidyr::pivot_wider(names_from = "property", values_from = "value") %>% 
       tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
       dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
       dplyr::select(-data) 
@@ -48,61 +49,139 @@ as_wide_table2 <- function(austraits){
   # Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
   austraits$methods <- 
     austraits$methods %>% 
-    dplyr::rename(c("dataset_description" = "description"))  %>% 
+    dplyr::rename(dataset_description = "description")  %>% 
     dplyr::distinct()
 
   # collapse into one column
   austraits$locations <- 
     austraits$locations %>% 
     dplyr::filter(value!="unknown") %>% 
-    dplyr::rename(c("property" = "location_property")) %>%
+    dplyr::rename("property" = "location_property") %>%
     split(., .$dataset_id) %>%
-    purrr::map_dfr(process_table2)
+    purrr::map_dfr(process_table3)
 
-  # rename taxonomic_reference field to reflect the APC/APNI name matching process better
+  # rename taxonomic_dataset field to reflect the APC/APNI name matching process better
   austraits$taxa <- 
     austraits$taxa %>% 
-    dplyr::rename(c("taxonNameValidation" = "taxonomic_reference")) %>% 
+    # dplyr::rename("taxonNameValidation" = "taxonomic_dataset") %>% 
     dplyr::distinct()
 
   austraits_wide <- 
     austraits$traits %>% 
     dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
-    dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
+    dplyr::left_join(by=c("dataset_id", "method_id", "trait_name"), austraits$methods) %>%
     dplyr::left_join(by=c("taxon_name"), austraits$taxa)
 
     # reorder the names to be more intuitive
     austraits_wide %>% dplyr::select(
 
     # The most useful (if you are filtering for just one taxon_name)
-      dataset_id, observation_id, trait_name, taxon_name, value, unit, 
-      entity_type, population_id, individual_id,
-      value_type, basis_of_value, 
-      replicates, 
+      "dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit", 
+      "entity_type", "population_id", "individual_id",
+      "value_type", "basis_of_value", 
+      "replicates", 
     # tissue, trait_category,  # Add after new zenodo release
 
     # More stuff you can filter on
-    collection_date, basis_of_record, life_stage, sampling_strategy, 
-    treatment_id, temporal_id, 
+    "collection_date", "basis_of_record", "life_stage", "sampling_strategy", 
+    "treatment_context_id", "temporal_context_id", 
 
     #stuff relating to locations
-    `latitude (deg)`, `longitude (deg)`, location, plot_id,
+    "latitude (deg)", "longitude (deg)", "location", "plot_context_id",
 
     #stuff relating to contexts and methods
-    context, methods, method_id, original_name,
+    "context", "methods", "method_id", "method_context_id", "original_name",
 
     #the citations
-    dataset_description, source_primary_citation, source_secondary_citation,
+    "dataset_description", "source_primary_citation", "source_secondary_citation",
 
     #the taxa details
-    taxonomic_status, taxon_distribution, 
-    taxon_rank, genus, family, #accepted_name_usage_id, 
-    scientific_name_authorship
+    "taxonomic_status", "taxon_distribution", 
+    "taxon_rank", "genus", "family"
     )
 
   austraits_wide
 }
 
+#' Turning entire AusTraits object into wide table v4
+#' @noRd
+#' @keywords internal
+as_wide_table2 <- function(austraits){
+
+  # Function to collapse columns in locations and contexts into single column
+  process_table2 <- function(data) {
+    data %>% 
+      tidyr::pivot_wider(names_from = "property", values_from = "value") %>% 
+      tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
+      dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
+      dplyr::select(-data) 
+  }
+
+  ################################################################################
+  # Define and adapt each table in the list of austraits to prepare for the wide table format 
+
+  # The contexts table needs the contexts collapsed to one context name per site
+  austraits %>% 
+    join_contexts(collapse_context = TRUE) -> austraits
+
+  # Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
+  austraits$methods <- 
+    austraits$methods %>% 
+    dplyr::rename(dataset_description = "description")  %>% 
+    dplyr::distinct()
+
+  # collapse into one column
+  austraits$locations <- 
+    austraits$locations %>% 
+    dplyr::filter(value!="unknown") %>% 
+    dplyr::rename("property" = "location_property") %>%
+    split(., .$dataset_id) %>%
+    purrr::map_dfr(process_table2)
+
+  # rename taxonomic_dataset field to reflect the APC/APNI name matching process better
+  austraits$taxa <- 
+    austraits$taxa %>% 
+    dplyr::rename("taxonNameValidation" = "taxonomic_dataset") %>% 
+    dplyr::distinct()
+
+  austraits_wide <- 
+    austraits$traits %>% 
+    dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
+    dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
+    dplyr::left_join(by=c("taxon_name"), austraits$taxa)
+
+  # reorder the names to be more intuitive
+  austraits_wide %>% dplyr::select(dplyr::any_of(
+
+    # The most useful (if you are filtering for just one taxon_name)
+    "dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit", 
+    "entity_type", "population_id", "individual_id",
+    "value_type", "basis_of_value", 
+    "replicates", 
+    # tissue, trait_category,  # Add after new zenodo release
+
+    # More stuff you can filter on
+    "collection_date", "basis_of_record", "life_stage", "sampling_strategy", 
+    "treatment_id", "temporal_id", 
+
+    #stuff relating to locations
+    "latitude (deg)", "longitude (deg)", "location", "plot_id",
+
+    #stuff relating to contexts and methods
+    "context", "methods", "method_id", "original_name",
+
+    #the citations
+    "dataset_description", "source_primary_citation", "source_secondary_citation",
+
+    #the taxa details
+    "taxonomic_status", "taxon_distribution", 
+    "taxon_rank", "genus", "family"
+
+  ))
+
+  austraits_wide
+}
+
 #' Turning entire AusTraits object into wide table <=3.0.2
 #' @noRd
 #' @keywords  internal
@@ -128,7 +207,7 @@ as_wide_table1 <- function(austraits){
   process_table <- function(data) {
 
     data %>% 
-      tidyr::pivot_wider(names_from = property, values_from = value) %>% 
+      tidyr::pivot_wider(names_from = "property", values_from = "value") %>% 
       tidyr::nest(data=-dplyr::any_of(c("dataset_id", "site_name", "context_name", "latitude (deg)", "longitude (deg)"))) %>%
       dplyr::mutate(site = purrr::map_chr(data, collapse_cols)) %>%
       dplyr::select(-data) 
@@ -140,15 +219,15 @@ as_wide_table1 <- function(austraits){
   # the trait table needs little prep. Rename the value columns as value
   austraits$traits <- 
     austraits$traits %>% 
-    dplyr::rename(c("trait_value" = "value")) 
+    dplyr::rename(trait_value = "value")
 
   # The contexts table needs the contexts collapsed to one context name per site
   austraits$contexts <- 
     austraits$contexts %>% 
-    dplyr::rename(c("property" = "context_property")) %>%
+    dplyr::rename(property = "context_property") %>%
     split(austraits$contexts$dataset_id) %>%
     purrr::map_dfr(process_table)  %>% 
-    dplyr::rename(c("context" = "site"))
+    dplyr::rename(context = "site")
 
   # Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
   austraits$methods <- 
@@ -161,23 +240,23 @@ as_wide_table1 <- function(austraits){
     dplyr::slice(1) %>%
     dplyr:: ungroup() %>%
     #------------
-  dplyr::select(-year_collected_start, -year_collected_end) %>% 
-    dplyr::rename(c("dataset_description" = "description"))  
+  dplyr::select(-c("year_collected_start", "year_collected_end")) %>% 
+    dplyr::rename(dataset_description = "description")  
 
   # collapse into one column
   austraits$sites <- 
     austraits$sites %>% 
     dplyr::filter(value!="unknown") %>% 
     # next line is a fix -- one dataset in 3.0.2 has value "site_name"
     dplyr::mutate(site_property = gsub("site_name", "name", site_property)) %>%
-    dplyr::rename(c("property" = "site_property")) %>%
+    dplyr::rename("property" = "site_property") %>%
     split(., .$dataset_id) %>%
     purrr::map_dfr(process_table)
 
   # rename source data field to reflect the APC/APNI name matching process better
   austraits$taxa <- 
     austraits$taxa %>% 
-    dplyr::rename(c("taxonNameValidation" = "source"))
+    dplyr::rename(taxonNameValidation = "source")
 
   austraits_wide <- 
     austraits$traits %>%
@@ -190,26 +269,26 @@ as_wide_table1 <- function(austraits){
     dplyr::select(
 
       # The most useful (if you are filtering for just one taxon_name)
-      dataset_id, observation_id, trait_name, taxon_name, trait_value, unit, 
-      value_type, replicates, 
+      "dataset_id", "observation_id", "trait_name", "taxon_name", "trait_value", "unit", 
+      "value_type", "replicates", 
       # tissue, trait_category,  # Add after new zenodo release
 
       # More stuff you can filter on
-      date, collection_type, sample_age_class, sampling_strategy, 
+      "date", "collection_type", "sample_age_class", "sampling_strategy", 
 
       #stuff relating to sites
-      `latitude (deg)`, `longitude (deg)`, site_name, site,
+      "latitude (deg)", "longitude (deg)", "site_name", "site",
 
       #stuff relating to contexts and methods
-      context_name, context, methods, original_name,
+      "context_name", "context", "methods", "original_name",
 
       #the citations
-      dataset_description, source_primary_citation, source_secondary_citation,
+      "dataset_description", "source_primary_citation", "source_secondary_citation",
 
       #the taxa details
-      taxonomicStatus, taxonDistribution, 
-      taxonRank, genus, family, acceptedNameUsageID, 
-      scientificNameAuthorship, ccAttributionIRI
+      "taxonomicStatus", "taxonDistribution", 
+      "taxonRank", "genus", "family", "acceptedNameUsageID", 
+      "scientificNameAuthorship", "ccAttributionIRI"
     )
 
   austraits_wide

diff --git a/R/austraits-package.R b/R/austraits-package.R
@@ -16,3 +16,45 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c(".", "dplyr::n()"))
 #' @importFrom lifecycle deprecated
 ## usethis namespace: end
 NULL
+
+utils::globalVariables(c("..density..",
+                       ".data",
+                       "Group",
+                       "abort",
+                       "australia",
+                       "colour",
+                       "context", 
+                       "context_name",
+                       "context_property",
+                       "dataset_id",
+                       "latitude (deg)", 
+                       "link_id", 
+                       "link_vals",
+                       "location_name", 
+                       "location_property",
+                       "longitude (deg)",
+                       "method_context_id",
+                       "method_id", 
+                       "n", 
+                       "n_vals", 
+                       "n_value_type", 
+                       "observation_id", 
+                       "original_name", 
+                       "percent",
+                       "percent_total",
+                       "repeat_measurements_id",
+                       "replicates",
+                       "shapes",
+                       "site_name",
+                       "site_property",
+                       "source_id",
+                       "taxon_name", 
+                       "text", 
+                       "trait_name",
+                       "value",
+                       "value_type",
+                       "x",
+                       "y", 
+                       "publication_date",
+                       "doi")
+)
diff --git a/R/bind_trait_values.R b/R/bind_trait_values.R
@@ -26,7 +26,7 @@ bind_trait_values <- function(trait_data) {
     if(nrow(.data) > 1) {
       return(
         .data %>% 
-          dplyr::mutate(value = bind_x(value),
+          dplyr::mutate(value = bind_x(.data$value),
                         value_type = bind_x(value_type),
                         replicates = bind_x(replicates)) %>%
           dplyr::filter(dplyr::row_number()==1) 

diff --git a/R/extract_dataset.R b/R/extract_dataset.R
@@ -18,6 +18,11 @@ extract_dataset <- function(austraits, dataset_id) {
   # Switch for different versions
   version <- what_version(austraits)
 
+  if(what_version(austraits) %in% c("4-series", "5-series")){
+    version <- "new" 
+  } else
+    version <- "old"
+
   switch (version,
           'new' = extract_dataset2(austraits, dataset_id),
           'old' = extract_dataset1(austraits, dataset_id),

diff --git a/R/extract_taxa.R b/R/extract_taxa.R
@@ -19,6 +19,11 @@ extract_taxa <- function(austraits, family = NULL, genus = NULL, taxon_name = NU
   # Switch for different versions
   version <- what_version(austraits)
 
+  if(what_version(austraits) %in% c("4-series", "5-series")){
+    version <- "new" 
+  } else
+    version <- "old"
+
   switch (version,
           'new' = extract_taxa2(austraits, family, genus, taxon_name),
           'old' = extract_taxa1(austraits, family, genus, taxon_name),

diff --git a/R/extract_trait.R b/R/extract_trait.R
@@ -20,6 +20,11 @@ extract_trait <- function(austraits, trait_names, taxon_names=NULL) {
   # Switch for different versions
   version <- what_version(austraits)
 
+  if(what_version(austraits) %in% c("4-series", "5-series")){
+    version <- "new" 
+  } else
+    version <- "old"
+
   switch (version,
           'new' = extract_trait2(austraits, trait_names, taxon_names),
           'old' = extract_trait1(austraits, trait_names, taxon_names),