-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
create_
functions to generate identifiers within use_
functions
new functions are `create_composite_id()`, `create_random_id()`, `create_sequential_id()`
- Loading branch information
1 parent
29eac6b
commit 48e5df4
Showing
17 changed files
with
370 additions
and
184 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#' Create identifier columns | ||
#' | ||
#' Identifiers are columns that uniquely identify a single record within a | ||
#' dataset. These are helper functions, designed to make it easier to | ||
#' generate such columns from a given dataset. They are designed to be called | ||
#' within [use_events()], [use_occurrences()], or (equivalently) | ||
#' [dplyr::mutate()]. Generally speaking, it is best practice to use existing | ||
#' information from a dataset to generate identifiers; for this reason we | ||
#' recomment using `create_composite_id()` to aggregate existing fields, if no | ||
#' such composite is already present within the dataset. It is possible to call | ||
#' `create_sequential_id()` or `create_random_id()` within | ||
#' `create_composite_id()` to combine existing and new columns. | ||
#' @rdname create_id | ||
#' @param ... Zero or more variable names from the tibble being | ||
#' mutated (unquoted), and/or zero or more `create_` functions, separated by | ||
#' commas. | ||
#' @param sep Character used to separate field values. Defaults to `"-"` | ||
#' @returns An amended tibble, containing a field with the requested information. | ||
#' @examples | ||
#' library(tibble) | ||
#' df <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), | ||
#' basisOfRecord = "humanObservation", | ||
#' site = rep(c("A01", "A02", "A03"), each = 5)) | ||
#' df |> | ||
#' use_occurrences(occurrenceID = create_composite_id(create_sequential_id(), | ||
#' site, | ||
#' eventDate)) | ||
#' @order 1 | ||
#' @export | ||
create_composite_id <- function(..., | ||
sep = "-"){ | ||
x <- enquos(...) | ||
string_result <- purrr::map(x, switch_expr_type) | ||
names(string_result) <- glue("V{seq_along(string_result)}") | ||
string_result <- c(string_result, sep = sep) | ||
do.call(paste, string_result) | ||
} | ||
|
||
#' Switch functions for quosures | ||
#' @param x A (single) quosure | ||
#' @importFrom rlang abort | ||
#' @importFrom rlang quo_get_expr | ||
#' @noRd | ||
#' @keywords internal | ||
switch_expr_type <- function(x){ | ||
switch(expr_type(x), | ||
"symbol" = {parse_symbol(x)}, | ||
"call" = {eval_tidy(x)}, | ||
"literal" = {quo_get_expr(x)}, | ||
abort("Quosure type not recognised.") | ||
) | ||
} | ||
|
||
#' Get type from quosures | ||
#' @param x A (single) quosure | ||
#' @importFrom rlang quo_is_symbol | ||
#' @importFrom rlang quo_is_call | ||
#' @importFrom rlang quo_get_expr | ||
#' @importFrom rlang is_syntactic_literal | ||
#' @noRd | ||
#' @keywords internal | ||
expr_type <- function(x){ | ||
if(quo_is_symbol(x)){ | ||
"symbol" | ||
}else if(quo_is_call(x)){ | ||
"call" | ||
}else if(is_syntactic_literal(quo_get_expr(x))){ | ||
"literal" | ||
}else{ | ||
typeof(x) | ||
} | ||
} | ||
|
||
#' Check whether symbols exist before they are parsed | ||
#' @param x A (single) quosure | ||
#' @importFrom rlang quo_get_expr | ||
#' @importFrom rlang quo_get_env | ||
#' @importFrom rlang eval_tidy | ||
#' @importFrom rlang as_label | ||
#' @noRd | ||
#' @keywords internal | ||
parse_symbol <- function(x){ | ||
if(exists(quo_get_expr(x), where = quo_get_env(x))){ | ||
result <- eval_tidy(x) | ||
if(inherits(result, "function")){ # special case for functions like 'data' | ||
as_label(x) # which exist in Global | ||
}else{ | ||
result | ||
} | ||
}else{ | ||
as_label(x) | ||
} | ||
} | ||
|
||
#' Internal function to parse a call | ||
#' @importFrom rlang eval_tidy | ||
#' @noRd | ||
#' @keywords internal | ||
parse_call <- function(x, ...){ | ||
eval_tidy(x) | ||
} | ||
|
||
#' @rdname create_id | ||
#' @param width (Integer) how many characters should the resulting string be? | ||
#' Defaults to one plus the order of magnitude of the largest number. | ||
#' @order 2 | ||
#' @export | ||
create_sequential_id <- function(width){ | ||
row_count <- dplyr::n() | ||
result <- seq_len(row_count) | ||
max_digits <- max(floor(log10(result)) + 1) | ||
if(missing(width)){ | ||
width <- max_digits + 1 | ||
} | ||
formatC(result, | ||
width = width, | ||
format = "d", | ||
flag = "0") | ||
} | ||
|
||
#' @rdname create_id | ||
#' @order 3 | ||
#' @export | ||
create_random_id <- function(){ | ||
uuid::UUIDgenerate(use.time = TRUE, dplyr::n()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#' Darwin Core terms | ||
#' | ||
#' Return a tibble of Darwin Core terms | ||
#' @noRd | ||
#' @keywords Internal | ||
darwin_core_terms <- function(){ | ||
dwc_terms | ||
} | ||
|
||
#' Country codes | ||
#' | ||
#' Return a tibble of valid country codes | ||
#' @noRd | ||
#' @keywords Internal | ||
country_codes <- function(){ | ||
country_codes | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,7 @@ knitr::opts_chunk$set( | |
`corella` is an R package that helps users standardize their data using the | ||
[*Darwin Core*](https://dwc.tdwg.org) data standard, used for biodiversity data like species occurrences. `corella` provides tools to prepare, manipulate and validate data against the standard's criteria. Once standardized, data can be subsequently shared as a [*Darwin Core Archive*](https://ipt.gbif.org/manual/en/ipt/latest/dwca-guide#what-is-darwin-core-archive-dwc-a) and published to open data infrastructures like the [Atlas of Living Australia](https://www.ala.org.au) and [GBIF](https://www.gbif.org/). | ||
|
||
`corella` was built, and is maintained, by the [Science & Decision Support Team](https://labs.ala.org.au) at the [Atlas of Living Australia](https://www.ala.org.au) (ALA). It is named for the Little Corella ([_Cacatua sanguinea_](https://bie.ala.org.au/species/https%3A//biodiversity.org.au/afd/taxa/34b31e86-7ade-4cba-960f-82a6ae586206)). The logo was designed by [Dax Kellie](https://daxkellie.com/) | ||
`corella` was built, and is maintained, by the [Science & Decision Support Team](https://labs.ala.org.au) at the [Atlas of Living Australia](https://www.ala.org.au) (ALA). It is named for the Little Corella ([_Cacatua sanguinea_](https://bie.ala.org.au/species/https%3A//biodiversity.org.au/afd/taxa/34b31e86-7ade-4cba-960f-82a6ae586206)). The logo was designed by [Dax Kellie](https://daxkellie.com/). | ||
|
||
If you have any comments, questions or suggestions, please [contact us](mailto:[email protected]). | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,7 @@ Support Team](https://labs.ala.org.au) at the [Atlas of Living | |
Australia](https://www.ala.org.au) (ALA). It is named for the Little | ||
Corella ([*Cacatua | ||
sanguinea*](https://bie.ala.org.au/species/https%3A//biodiversity.org.au/afd/taxa/34b31e86-7ade-4cba-960f-82a6ae586206)). | ||
The logo was designed by [Dax Kellie](https://daxkellie.com/) | ||
The logo was designed by [Dax Kellie](https://daxkellie.com/). | ||
|
||
If you have any comments, questions or suggestions, please [contact | ||
us](mailto:[email protected]). | ||
|
@@ -149,7 +149,7 @@ df |> | |
#> ℹ Testing data | ||
#> ✔ | E P | Column | ||
#> ⠙ | 0 eventDate | ||
#> ✔ | 1 ✖ | eventDate [207ms] | ||
#> ⠹ | 1 ✖ | eventDate ✔ | 1 ✖ | eventDate [71ms] | ||
#> ══ Results ═════════════════════════════════════════════════════════════════════ | ||
#> | ||
#> [ Errors: 1 | Pass: 0 ] | ||
|
Oops, something went wrong.