Skip to content

Commit

Permalink
Changes for v5 austraits.build (#78)
Browse files Browse the repository at this point in the history
* changes required for v5 austraits.build

* Removed original_name for trait_pivot_wider3 for v5.0.0

* Added trait_pivot_wider for v4.x.x and code for what_version

* Making new switches for join and as_wide_table based on new versioning

* Sub switch for extract_ and recreated internal data

* Sub switches for trait_pivot_longer

* Minor fix in join_methods

* Added vars a global vars

* Removed .data calls when not needed

* Update to work with latest zenodo API (#81)


- As documented in #79 , the Zenodo API has changed, breaking our download feature. 
- This commit updates the internals to work with the latest changes. 

Specifically: 

- the way to access json for all versions has changed (changed url structure, and for id we now use one of the record ids, rather than the conceptid)
- the call to download file has changed
- format of the API json has changed

Also

- added record id to the table of versions
- put a check in to remove "v" from any version entered by user

* Recreated data so extract is passing

* Update `treatment_id` with `treatment_context_id`

* Revert "Update `treatment_id` with `treatment_context_id`"

This reverts commit 3fc6717.

* minor column name changes

Changes column names, reflecting recent changes to traits.build output.

* Update as_wide_table.R

add `any_of` to column selection within `as_wide_table` to accommodate other traits.build databases that don't have the same columns in taxon_list.csv

* Fixed getting versions and load austraits with zenodo updates and minor update with as_wide_table with removal of variable

* Fixed minor bug in get_version_latest

---------

Co-authored-by: Elizabeth Wenk <[email protected]>
Co-authored-by: Daniel Falster <[email protected]>
Co-authored-by: yangsophieee <[email protected]>
  • Loading branch information
4 people authored Nov 21, 2023
1 parent 17de18c commit f9c1666
Show file tree
Hide file tree
Showing 18 changed files with 326 additions and 126 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Encoding: UTF-8
Language: en
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Depends:
R (>= 4.0.0),
RefManageR
Expand All @@ -41,12 +41,13 @@ Imports:
janitor,
lifecycle,
ggplot2,
ggpointdensity,
ggpointdensity,
ggbeeswarm (>= 0.7.1),
gridExtra,
scales,
forcats,
viridis
viridis,
lubridate
Suggests:
knitr,
rmarkdown,
Expand Down
163 changes: 121 additions & 42 deletions R/as_wide_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,21 @@ as_wide_table <- function(austraits){
version <- what_version(austraits)

switch (version,
'new' = as_wide_table2(austraits),
'old' = as_wide_table1(austraits),
)
"5-series" = as_wide_table3(austraits),
"4-series" = as_wide_table2(austraits),
"3-series-earlier" = as_wide_table1(austraits)
)
}

#' Turning entire AusTraits object into wide table >3.0.2
#' Turning entire AusTraits object into wide table v5
#' @noRd
#' @keywords internal
as_wide_table2 <- function(austraits){
as_wide_table3 <- function(austraits){

# Function to collapse columns in locations and contexts into single column
process_table2 <- function(data) {
process_table3 <- function(data) {
data %>%
tidyr::pivot_wider(names_from = property, values_from = value) %>%
tidyr::pivot_wider(names_from = "property", values_from = "value") %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
Expand All @@ -48,61 +49,139 @@ as_wide_table2 <- function(austraits){
# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
austraits$methods %>%
dplyr::rename(c("dataset_description" = "description")) %>%
dplyr::rename(dataset_description = "description") %>%
dplyr::distinct()

# collapse into one column
austraits$locations <-
austraits$locations %>%
dplyr::filter(value!="unknown") %>%
dplyr::rename(c("property" = "location_property")) %>%
dplyr::rename("property" = "location_property") %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table2)
purrr::map_dfr(process_table3)

# rename taxonomic_reference field to reflect the APC/APNI name matching process better
# rename taxonomic_dataset field to reflect the APC/APNI name matching process better
austraits$taxa <-
austraits$taxa %>%
dplyr::rename(c("taxonNameValidation" = "taxonomic_reference")) %>%
# dplyr::rename("taxonNameValidation" = "taxonomic_dataset") %>%
dplyr::distinct()

austraits_wide <-
austraits$traits %>%
dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
dplyr::left_join(by=c("dataset_id", "method_id", "trait_name"), austraits$methods) %>%
dplyr::left_join(by=c("taxon_name"), austraits$taxa)

# reorder the names to be more intuitive
austraits_wide %>% dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
dataset_id, observation_id, trait_name, taxon_name, value, unit,
entity_type, population_id, individual_id,
value_type, basis_of_value,
replicates,
"dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit",
"entity_type", "population_id", "individual_id",
"value_type", "basis_of_value",
"replicates",
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
collection_date, basis_of_record, life_stage, sampling_strategy,
treatment_id, temporal_id,
"collection_date", "basis_of_record", "life_stage", "sampling_strategy",
"treatment_context_id", "temporal_context_id",

#stuff relating to locations
`latitude (deg)`, `longitude (deg)`, location, plot_id,
"latitude (deg)", "longitude (deg)", "location", "plot_context_id",

#stuff relating to contexts and methods
context, methods, method_id, original_name,
"context", "methods", "method_id", "method_context_id", "original_name",

#the citations
dataset_description, source_primary_citation, source_secondary_citation,
"dataset_description", "source_primary_citation", "source_secondary_citation",

#the taxa details
taxonomic_status, taxon_distribution,
taxon_rank, genus, family, #accepted_name_usage_id,
scientific_name_authorship
"taxonomic_status", "taxon_distribution",
"taxon_rank", "genus", "family"
)

austraits_wide
}

#' Turning entire AusTraits object into wide table v4
#' @noRd
#' @keywords internal
as_wide_table2 <- function(austraits){

# Function to collapse columns in locations and contexts into single column
process_table2 <- function(data) {
data %>%
tidyr::pivot_wider(names_from = "property", values_from = "value") %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
}

################################################################################
# Define and adapt each table in the list of austraits to prepare for the wide table format

# The contexts table needs the contexts collapsed to one context name per site
austraits %>%
join_contexts(collapse_context = TRUE) -> austraits

# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
austraits$methods %>%
dplyr::rename(dataset_description = "description") %>%
dplyr::distinct()

# collapse into one column
austraits$locations <-
austraits$locations %>%
dplyr::filter(value!="unknown") %>%
dplyr::rename("property" = "location_property") %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table2)

# rename taxonomic_dataset field to reflect the APC/APNI name matching process better
austraits$taxa <-
austraits$taxa %>%
dplyr::rename("taxonNameValidation" = "taxonomic_dataset") %>%
dplyr::distinct()

austraits_wide <-
austraits$traits %>%
dplyr::left_join(by=c("dataset_id", "location_id"), austraits$locations) %>%
dplyr::left_join(by=c("dataset_id", "trait_name"), austraits$methods) %>%
dplyr::left_join(by=c("taxon_name"), austraits$taxa)

# reorder the names to be more intuitive
austraits_wide %>% dplyr::select(dplyr::any_of(

# The most useful (if you are filtering for just one taxon_name)
"dataset_id", "observation_id", "trait_name", "taxon_name", "value", "unit",
"entity_type", "population_id", "individual_id",
"value_type", "basis_of_value",
"replicates",
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
"collection_date", "basis_of_record", "life_stage", "sampling_strategy",
"treatment_id", "temporal_id",

#stuff relating to locations
"latitude (deg)", "longitude (deg)", "location", "plot_id",

#stuff relating to contexts and methods
"context", "methods", "method_id", "original_name",

#the citations
"dataset_description", "source_primary_citation", "source_secondary_citation",

#the taxa details
"taxonomic_status", "taxon_distribution",
"taxon_rank", "genus", "family"

))

austraits_wide
}

#' Turning entire AusTraits object into wide table <=3.0.2
#' @noRd
#' @keywords internal
Expand All @@ -128,7 +207,7 @@ as_wide_table1 <- function(austraits){
process_table <- function(data) {

data %>%
tidyr::pivot_wider(names_from = property, values_from = value) %>%
tidyr::pivot_wider(names_from = "property", values_from = "value") %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "site_name", "context_name", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(site = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
Expand All @@ -140,15 +219,15 @@ as_wide_table1 <- function(austraits){
# the trait table needs little prep. Rename the value columns as value
austraits$traits <-
austraits$traits %>%
dplyr::rename(c("trait_value" = "value"))
dplyr::rename(trait_value = "value")

# The contexts table needs the contexts collapsed to one context name per site
austraits$contexts <-
austraits$contexts %>%
dplyr::rename(c("property" = "context_property")) %>%
dplyr::rename(property = "context_property") %>%
split(austraits$contexts$dataset_id) %>%
purrr::map_dfr(process_table) %>%
dplyr::rename(c("context" = "site"))
dplyr::rename(context = "site")

# Getting rid of the columns that will soon be deleted in the next austraits release and renaming the description column
austraits$methods <-
Expand All @@ -161,23 +240,23 @@ as_wide_table1 <- function(austraits){
dplyr::slice(1) %>%
dplyr:: ungroup() %>%
#------------
dplyr::select(-year_collected_start, -year_collected_end) %>%
dplyr::rename(c("dataset_description" = "description"))
dplyr::select(-c("year_collected_start", "year_collected_end")) %>%
dplyr::rename(dataset_description = "description")

# collapse into one column
austraits$sites <-
austraits$sites %>%
dplyr::filter(value!="unknown") %>%
# next line is a fix -- one dataset in 3.0.2 has value "site_name"
dplyr::mutate(site_property = gsub("site_name", "name", site_property)) %>%
dplyr::rename(c("property" = "site_property")) %>%
dplyr::rename("property" = "site_property") %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table)

# rename source data field to reflect the APC/APNI name matching process better
austraits$taxa <-
austraits$taxa %>%
dplyr::rename(c("taxonNameValidation" = "source"))
dplyr::rename(taxonNameValidation = "source")

austraits_wide <-
austraits$traits %>%
Expand All @@ -190,26 +269,26 @@ as_wide_table1 <- function(austraits){
dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
dataset_id, observation_id, trait_name, taxon_name, trait_value, unit,
value_type, replicates,
"dataset_id", "observation_id", "trait_name", "taxon_name", "trait_value", "unit",
"value_type", "replicates",
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
date, collection_type, sample_age_class, sampling_strategy,
"date", "collection_type", "sample_age_class", "sampling_strategy",

#stuff relating to sites
`latitude (deg)`, `longitude (deg)`, site_name, site,
"latitude (deg)", "longitude (deg)", "site_name", "site",

#stuff relating to contexts and methods
context_name, context, methods, original_name,
"context_name", "context", "methods", "original_name",

#the citations
dataset_description, source_primary_citation, source_secondary_citation,
"dataset_description", "source_primary_citation", "source_secondary_citation",

#the taxa details
taxonomicStatus, taxonDistribution,
taxonRank, genus, family, acceptedNameUsageID,
scientificNameAuthorship, ccAttributionIRI
"taxonomicStatus", "taxonDistribution",
"taxonRank", "genus", "family", "acceptedNameUsageID",
"scientificNameAuthorship", "ccAttributionIRI"
)

austraits_wide
Expand Down
42 changes: 42 additions & 0 deletions R/austraits-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,45 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".", "dplyr::n()"))
#' @importFrom lifecycle deprecated
## usethis namespace: end
NULL

utils::globalVariables(c("..density..",
".data",
"Group",
"abort",
"australia",
"colour",
"context",
"context_name",
"context_property",
"dataset_id",
"latitude (deg)",
"link_id",
"link_vals",
"location_name",
"location_property",
"longitude (deg)",
"method_context_id",
"method_id",
"n",
"n_vals",
"n_value_type",
"observation_id",
"original_name",
"percent",
"percent_total",
"repeat_measurements_id",
"replicates",
"shapes",
"site_name",
"site_property",
"source_id",
"taxon_name",
"text",
"trait_name",
"value",
"value_type",
"x",
"y",
"publication_date",
"doi")
)
2 changes: 1 addition & 1 deletion R/bind_trait_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ bind_trait_values <- function(trait_data) {
if(nrow(.data) > 1) {
return(
.data %>%
dplyr::mutate(value = bind_x(value),
dplyr::mutate(value = bind_x(.data$value),
value_type = bind_x(value_type),
replicates = bind_x(replicates)) %>%
dplyr::filter(dplyr::row_number()==1)
Expand Down
5 changes: 5 additions & 0 deletions R/extract_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ extract_dataset <- function(austraits, dataset_id) {
# Switch for different versions
version <- what_version(austraits)

if(what_version(austraits) %in% c("4-series", "5-series")){
version <- "new"
} else
version <- "old"

switch (version,
'new' = extract_dataset2(austraits, dataset_id),
'old' = extract_dataset1(austraits, dataset_id),
Expand Down
5 changes: 5 additions & 0 deletions R/extract_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ extract_taxa <- function(austraits, family = NULL, genus = NULL, taxon_name = NU
# Switch for different versions
version <- what_version(austraits)

if(what_version(austraits) %in% c("4-series", "5-series")){
version <- "new"
} else
version <- "old"

switch (version,
'new' = extract_taxa2(austraits, family, genus, taxon_name),
'old' = extract_taxa1(austraits, family, genus, taxon_name),
Expand Down
5 changes: 5 additions & 0 deletions R/extract_trait.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ extract_trait <- function(austraits, trait_names, taxon_names=NULL) {
# Switch for different versions
version <- what_version(austraits)

if(what_version(austraits) %in% c("4-series", "5-series")){
version <- "new"
} else
version <- "old"

switch (version,
'new' = extract_trait2(austraits, trait_names, taxon_names),
'old' = extract_trait1(austraits, trait_names, taxon_names),
Expand Down
Loading

0 comments on commit f9c1666

Please sign in to comment.