Skip to content

Commit

Permalink
Getting rid of notes
Browse files Browse the repository at this point in the history
  • Loading branch information
fontikar committed Oct 13, 2023
1 parent 30f9b67 commit 814606d
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 67 deletions.
84 changes: 42 additions & 42 deletions R/as_wide_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ as_wide_table3 <- function(austraits){
# Function to collapse columns in locations and contexts into single column
process_table3 <- function(data) {
data %>%
tidyr::pivot_wider(names_from = property, values_from = value) %>%
tidyr::pivot_wider(names_from = .data$property, values_from = .data$value) %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
Expand All @@ -55,7 +55,7 @@ as_wide_table3 <- function(austraits){
# collapse into one column
austraits$locations <-
austraits$locations %>%
dplyr::filter(value!="unknown") %>%
dplyr::filter(.data$value!="unknown") %>%
dplyr::rename(c("property" = "location_property")) %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table3)
Expand All @@ -76,29 +76,29 @@ as_wide_table3 <- function(austraits){
austraits_wide %>% dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
dataset_id, observation_id, trait_name, taxon_name, value, unit,
entity_type, population_id, individual_id,
value_type, basis_of_value,
replicates,
.data$dataset_id, .data$observation_id, .data$trait_name, .data$taxon_name, .data$value, .data$unit,
.data$entity_type, .data$population_id, .data$individual_id,
.data$value_type, .data$basis_of_value,
.data$replicates,
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
collection_date, basis_of_record, life_stage, sampling_strategy,
treatment_context_id, temporal_context_id,
.data$collection_date, .data$basis_of_record, .data$life_stage, .data$sampling_strategy,
.data$treatment_context_id, .data$temporal_context_id,

#stuff relating to locations
`latitude (deg)`, `longitude (deg)`, location, plot_context_id,
.data$`latitude (deg)`, .data$`longitude (deg)`, .data$location, .data$plot_context_id,

#stuff relating to contexts and methods
context, methods, method_id, method_context_id, original_name,
.data$context, .data$methods, .data$method_id, .data$method_context_id, .data$original_name,

#the citations
dataset_description, source_primary_citation, source_secondary_citation,
.data$dataset_description, .data$source_primary_citation, .data$source_secondary_citation,

#the taxa details
taxonomic_status, taxon_distribution,
taxon_rank, genus, family, #accepted_name_usage_id,
scientific_name_authorship
.data$taxonomic_status, .data$taxon_distribution,
.data$taxon_rank, .data$genus, .data$family, #accepted_name_usage_id,
.data$scientific_name_authorship
)

austraits_wide
Expand All @@ -112,7 +112,7 @@ as_wide_table2 <- function(austraits){
# Function to collapse columns in locations and contexts into single column
process_table2 <- function(data) {
data %>%
tidyr::pivot_wider(names_from = property, values_from = value) %>%
tidyr::pivot_wider(names_from = .data$property, values_from = .data$value) %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "location_id", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(location = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
Expand All @@ -134,7 +134,7 @@ as_wide_table2 <- function(austraits){
# collapse into one column
austraits$locations <-
austraits$locations %>%
dplyr::filter(value!="unknown") %>%
dplyr::filter(.data$value!="unknown") %>%
dplyr::rename(c("property" = "location_property")) %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table2)
Expand All @@ -155,29 +155,29 @@ as_wide_table2 <- function(austraits){
austraits_wide %>% dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
dataset_id, observation_id, trait_name, taxon_name, value, unit,
entity_type, population_id, individual_id,
value_type, basis_of_value,
replicates,
.data$dataset_id, .data$observation_id, .data$trait_name, .data$taxon_name, .data$value, .data$unit,
.data$entity_type, .data$population_id, .data$individual_id,
.data$value_type, .data$basis_of_value,
.data$replicates,
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
collection_date, basis_of_record, life_stage, sampling_strategy,
treatment_id, temporal_id,
.data$collection_date, .data$basis_of_record, .data$life_stage, .data$sampling_strategy,
.data$treatment_id, .data$temporal_id,

#stuff relating to locations
`latitude (deg)`, `longitude (deg)`, location, plot_id,
.data$`latitude (deg)`, .data$`longitude (deg)`, .data$location, .data$plot_id,

#stuff relating to contexts and methods
context, methods, method_id, original_name,
.data$context, .data$methods, .data$method_id, .data$original_name,

#the citations
dataset_description, source_primary_citation, source_secondary_citation,
.data$dataset_description, .data$source_primary_citation, .data$source_secondary_citation,

#the taxa details
taxonomic_status, taxon_distribution,
taxon_rank, genus, family, #accepted_name_usage_id,
scientific_name_authorship
.data$taxonomic_status, .data$taxon_distribution,
.data$taxon_rank, .data$genus, .data$family, #accepted_name_usage_id,
.data$scientific_name_authorship

)

Expand Down Expand Up @@ -209,7 +209,7 @@ as_wide_table1 <- function(austraits){
process_table <- function(data) {

data %>%
tidyr::pivot_wider(names_from = property, values_from = value) %>%
tidyr::pivot_wider(names_from = .data$property, values_from = .data$value) %>%
tidyr::nest(data=-dplyr::any_of(c("dataset_id", "site_name", "context_name", "latitude (deg)", "longitude (deg)"))) %>%
dplyr::mutate(site = purrr::map_chr(data, collapse_cols)) %>%
dplyr::select(-data)
Expand Down Expand Up @@ -238,19 +238,19 @@ as_wide_table1 <- function(austraits){
# TODO: this section can be removed for next release
# Some studies have multiple records per traits. This breaks things when joining
# For now select first
dplyr::group_by(dataset_id, trait_name) %>%
dplyr::group_by(.data$dataset_id, .data$trait_name) %>%
dplyr::slice(1) %>%
dplyr:: ungroup() %>%
#------------
dplyr::select(-c(year_collected_start, year_collected_end)) %>%
dplyr::select(-c(.data$year_collected_start, .data$year_collected_end)) %>%
dplyr::rename(c("dataset_description" = "description"))

# collapse into one column
austraits$sites <-
austraits$sites %>%
dplyr::filter(value!="unknown") %>%
dplyr::filter(.data$value!="unknown") %>%
# next line is a fix -- one dataset in 3.0.2 has value "site_name"
dplyr::mutate(site_property = gsub("site_name", "name", site_property)) %>%
dplyr::mutate(site_property = gsub("site_name", "name", .data$site_property)) %>%
dplyr::rename(c("property" = "site_property")) %>%
split(., .$dataset_id) %>%
purrr::map_dfr(process_table)
Expand All @@ -271,26 +271,26 @@ as_wide_table1 <- function(austraits){
dplyr::select(

# The most useful (if you are filtering for just one taxon_name)
dataset_id, observation_id, trait_name, taxon_name, trait_value, unit,
value_type, replicates,
.data$dataset_id, .data$observation_id, .data$trait_name, .data$taxon_name, .data$trait_value, .data$unit,
.data$value_type, .data$replicates,
# tissue, trait_category, # Add after new zenodo release

# More stuff you can filter on
date, collection_type, sample_age_class, sampling_strategy,
.data$date, .data$collection_type, .data$sample_age_class, .data$sampling_strategy,

#stuff relating to sites
`latitude (deg)`, `longitude (deg)`, site_name, site,
.data$`latitude (deg)`, .data$`longitude (deg)`, .data$site_name, .data$site,

#stuff relating to contexts and methods
context_name, context, methods, original_name,
.data$context_name, .data$context, .data$methods, .data$original_name,

#the citations
dataset_description, source_primary_citation, source_secondary_citation,
.data$dataset_description, .data$source_primary_citation, .data$source_secondary_citation,

#the taxa details
taxonomicStatus, taxonDistribution,
taxonRank, genus, family, acceptedNameUsageID,
scientificNameAuthorship, ccAttributionIRI
.data$taxonomicStatus, .data$taxonDistribution,
.data$taxonRank, .data$genus, .data$family, .data$acceptedNameUsageID,
.data$scientificNameAuthorship, .data$ccAttributionIRI
)

austraits_wide
Expand Down
10 changes: 5 additions & 5 deletions R/bind_trait_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ bind_trait_values <- function(trait_data) {
if(nrow(.data) > 1) {
return(
.data %>%
dplyr::mutate(value = bind_x(value),
value_type = bind_x(value_type),
replicates = bind_x(replicates)) %>%
dplyr::mutate(value = bind_x(.data$value),
value_type = bind_x(.data$value_type),
replicates = bind_x(.data$replicates)) %>%
dplyr::filter(dplyr::row_number()==1)
)
}
.data
}

trait_data %>%
dplyr::group_by(observation_id, trait_name) %>%
dplyr::group_by(.data$observation_id, .data$trait_name) %>%
bind_values_worker() %>%
dplyr::ungroup() %>%
dplyr::arrange(observation_id, trait_name, value_type)
dplyr::arrange(.data$observation_id, .data$trait_name, .data$value_type)
}
6 changes: 3 additions & 3 deletions R/join_all.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' @title Join study details into main `traits` dataset
#' @description Function to append all study information (method, location, taxonomic, context) variables into trait database
#' @param austraits dataframe generated by austraits build
#' @param vars variables to select from the respective table where information is being joined from. Not available for contexts table
#' @param ... arguments passed to `vars` to subset the columns
#' @return austraits list object, but with additional variables appended to `traits` dataframe
#' @rdname join_all
Expand All @@ -17,7 +16,7 @@
#' (austraits %>% join_contexts)$traits
#'
#' # Append methods
#' (austraits %>% join_methods)$traits
#' (austraits %>% join_methods(vars = c("method_id")))$traits
#'
#' #Append taxonomic details
#' (austraits %>% join_taxonomy)$traits
Expand Down Expand Up @@ -168,7 +167,8 @@ join_locations1 <- function(austraits, vars = c("longitude (deg)","latitude (de
#' @title Joining location info for AusTraits versions <= 3.0.2
#' @description `r lifecycle::badge('deprecated')`
#' Joining location info for AusTraits versions <= 3.0.2
#' @inheritParams join_locations
#' @param austraits austraits object
#' @param vars variables from site table to join
#' @export

join_sites <- function(austraits, vars = c("longitude (deg)","latitude (deg)")) {
Expand Down
10 changes: 5 additions & 5 deletions R/summarise_austraits.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ summarise_austraits_traits <-function(austraits, var) {
percent = NULL)
# Summary statistics
sum_stats <- austraits[["traits"]] %>%
dplyr::group_by(trait_name) %>%
dplyr::summarise(n_dataset = length(unique(dataset_id)),
n_taxa = length(unique(taxon_name)))
dplyr::group_by(.data$trait_name) %>%
dplyr::summarise(n_dataset = length(unique(.data$dataset_id)),
n_taxa = length(unique(.data$taxon_name)))

ret <- dplyr::left_join(ret, sum_stats, by = "trait_name")

Expand Down Expand Up @@ -86,8 +86,8 @@ summarise_austraits_taxa <-function(austraits, var) {
# Summary statistics (https://stackoverflow.com/questions/55425976/use-quoted-variable-in-group-by-mutate-function-call)
sum_stats <- austraits[["traits"]] %>%
dplyr::group_by(!!rlang::sym(var)) %>%
dplyr::summarise(n_dataset = length(unique(dataset_id)),
n_taxa = length(unique(taxon_name)))
dplyr::summarise(n_dataset = length(unique(.data$dataset_id)),
n_taxa = length(unique(.data$taxon_name)))

ret <- dplyr::left_join(ret, sum_stats, by = var)

Expand Down
14 changes: 7 additions & 7 deletions R/summarise_trait_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,23 @@
summarise_trait_means <- function(trait_data){
suppressWarnings(
trait_data %>%
dplyr::mutate(value = as.numeric(value),
replicates = as.numeric(replicates)) -> trait_data
dplyr::mutate(value = as.numeric(.data$value),
replicates = as.numeric(.data$replicates)) -> trait_data
)

# Identify which ones need summarising
target <- trait_data %>%
dplyr::group_by(trait_name, observation_id) %>%
dplyr::group_by(.data$trait_name, .data$observation_id) %>%
dplyr::summarise(dplyr::n()) %>%
dplyr::filter(`dplyr::n()` > 1) %>%
dplyr::select(trait_name, observation_id)
dplyr::select(.data$trait_name, .data$observation_id)

# # Identify which ones that don't need to change
original <- trait_data %>%
dplyr::group_by(trait_name, observation_id) %>%
dplyr::group_by(.data$trait_name, .data$observation_id) %>%
dplyr::summarise(dplyr::n()) %>%
dplyr::filter(! `dplyr::n()` > 1) %>%
dplyr::select(trait_name, observation_id)
dplyr::select(.data$trait_name, .data$observation_id)

original_df <- purrr::map2_dfr(original$trait_name, original$observation_id,
~ dplyr::filter(trait_data, trait_name == .x & observation_id == .y))
Expand All @@ -57,5 +57,5 @@ summarise_trait_means <- function(trait_data){
ret <- dplyr::bind_rows(original_df, target_bound)

# Sort by observation_id and return
ret %>% dplyr::arrange(observation_id)
ret %>% dplyr::arrange(.data$observation_id)
}
4 changes: 1 addition & 3 deletions man/join_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/join_sites.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 814606d

Please sign in to comment.