diff --git a/.Rbuildignore b/.Rbuildignore index b5d73a8..4aa3043 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ ^inst/hex.R$ README.Rmd ^doc$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..bfc9f4d --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/DESCRIPTION b/DESCRIPTION index d7eb165..4146c8d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Depends: R (>= 4.3.0) Imports: cli, - corroboree, + corella, curl, dplyr, elm, @@ -46,6 +46,7 @@ Suggests: testthat (>= 3.0.0), xml2 License: MPL-2.0 +URL: https://galaxias.ala.org.au BugReports: https://github.com/AtlasOfLivingAustralia/galaxias/issues Maintainer: Martin Westgate Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 997c8ff..440613d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,9 +13,13 @@ export(galaxias_project) export(get_validator_report) export(validate_archive) importFrom(cli,cat_line) +importFrom(cli,cli_abort) importFrom(cli,cli_h2) importFrom(cli,cli_h3) -importFrom(corroboree,check_occurrences) +importFrom(cli,cli_inform) +importFrom(cli,cli_progress_step) +importFrom(cli,cli_progress_update) +importFrom(corella,check_occurrences) importFrom(curl,form_data) importFrom(curl,form_file) importFrom(dplyr,bind_rows) @@ -24,11 +28,11 @@ importFrom(dplyr,mutate) importFrom(dplyr,pull) importFrom(dplyr,select) importFrom(dplyr,slice_head) -importFrom(elm,add_elm_header) -importFrom(elm,check_elm) -importFrom(elm,read_elm) +importFrom(elm,add_eml_header) +importFrom(elm,check_eml) +importFrom(elm,read_md) importFrom(elm,use_metadata) -importFrom(elm,write_elm) +importFrom(elm,write_eml) importFrom(glue,glue) importFrom(glue,glue_collapse) importFrom(httr2,req_body_multipart) diff --git a/R/build_archive.R b/R/build_archive.R index 0da389a..db94b28 100644 --- a/R/build_archive.R +++ b/R/build_archive.R @@ -13,7 +13,7 @@ #' #' * One or more `csv` files such as `occurrences.csv` &/or `events.csv`. #' These will be manipulated versions of the raw dataset, which have been -#' altered to use Darwin Core terms as column headers. See the `corroboree` +#' altered to use Darwin Core terms as column headers. See the `corella` #' package for details. #' * A metadata statement, stored in xml using the filename `eml.xml`. The #' function `use_metadata()` from the `elm` package is a good starting point @@ -34,12 +34,24 @@ #' @export build_archive <- function(x = "data", file) { x <- get_default_directory(x) + + progress_update("Retrieving metadata...") files_in <- find_data(x) + + progress_update("Creating zip folder...") file_out <- get_default_file(file) + + progress_update("Building Darwin Core Archive...") zip::zip(zipfile = file_out, files = files_in, mode = "cherry-pick") - invisible(return(file_out)) + + cli::cli_alert_success("Darwin Core Archive successfully built. \nSaved as {.file {file_out}}.") + cli::cli_progress_done() + + # invisible(return(file_out)) # might need this to save + + } #' Simple function to specify a zip file if no arg given @@ -52,7 +64,7 @@ get_default_file <- function(file){ glue("{getwd()}.zip") }else{ if(!grepl(".zip$", file)){ - abort("file must end in `.zip`") + abort("File must end in `.zip`.") }else{ file } @@ -62,17 +74,18 @@ get_default_file <- function(file){ #' Simple function to check that a `data` directory exists if no arg given #' @importFrom rlang abort #' @importFrom rlang inform +#' @importFrom cli cli_inform #' @importFrom glue glue #' @noRd #' @keywords Internal get_default_directory <- function(x){ if(missing(x)){ if(dir.exists("data")){ - inform("`x` is missing; defaulting to `data` folder") + cli_inform("Missing `directory`. Defaulting to {.file data} folder.") x <- "data" }else{ - abort(c("`x` is missing, and `data` folder is missing", - i = "please supply a folder containing required data")) + abort(c("Missing `directory` and missing `data` folder.", + i = "Please specify a folder containing required data.")) } }else{ if(!dir.exists(x)){ @@ -86,15 +99,17 @@ get_default_directory <- function(x){ #' Find metadata info in a repository #' @importFrom glue glue_collapse #' @importFrom rlang abort +#' @importFrom cli cli_abort #' @importFrom rlang caller_env #' @noRd #' @keywords Internal find_data <- function(directory, call = caller_env()){ if(!file.exists(directory)){ - bullets <- c(glue("`{directory}` directory is required, but missing."), - i = "use `usethis::use_data()` to add data to your project.") - abort(bullets, + bullets <- c(glue("Missing `directory`."), + i = "Use `usethis::use_data()` to add data to your project.", + x = "Can't find directory `{directory}`.") + cli_abort(bullets, call = call) } accepted_names <- c("occurrences", @@ -105,24 +120,24 @@ find_data <- function(directory, pattern = glue("^{accepted_names}.csv$")) if(length(file_list) < 1){ bullets <- c("No data meeting Darwin Core requirements is given in `data`.", - i = "use `add_bd_data_raw()` for examples of how to add raw data to your package", - i = "use `usethis::use_data()` to add data to your package") + i = "Use `add_bd_data_raw()` for examples of how to add raw data to your package.", + i = "Use `usethis::use_data()` to add data to your package.") abort(bullets, call = call) } if(!file.exists(glue("{directory}/meta.xml"))){ - bullets <- c("No schema file (`meta.xml`) is present in the specified directory.", - i = "use `build_schema()` to create one") - abort(bullets, + bullets <- c("No schema file ({.file meta.xml}) is present in the specified directory.", + i = "Use `build_schema()` to create a schema file.") + cli_abort(bullets, call = call) } if(!file.exists(glue("{directory}/eml.xml"))){ - bullets <- c("No metadata statement (`eml.xml`) is present in the specified directory.", - i = "See `elm::use_metadata()` for an example metadata statement,", - i = "then `build_metadata()` to convert to `eml.xml`.") - abort(bullets, + bullets <- c("No metadata statement ({.file eml.xml}) is present in the specified directory.", + i = "See `elm::use_metadata()` for an example metadata statement.", + i = "Use `build_metadata()` to convert to {.file eml.xml}.") + cli_abort(bullets, call = call) } diff --git a/R/build_metadata.R b/R/build_metadata.R index a4438c0..3b8d5a4 100644 --- a/R/build_metadata.R +++ b/R/build_metadata.R @@ -6,25 +6,33 @@ #' specified using the `directory` argument. #' #' This function is a fairly shallow wrapper on top of functionality build -#' in the `elm` package, particularly `read_elm()` and `write_elm()`. You can +#' in the `elm` package, particularly `read_md()` and `write_eml()`. You can #' use that package to gain greater control, or to debug problems, should you #' wish. -#' @param x Path to a metadata statement stored in markdown format (.md). +#' @param path Path to a metadata statement stored in markdown format (.md). #' @param file A file where the result should be saved. Defaults to #' `data/eml.xml`. #' @returns Does not return an object to the workspace; called for the side #' effect of building a file named `meta.xml` in the `data` directory. -#' @importFrom elm add_elm_header -#' @importFrom elm read_elm -#' @importFrom elm write_elm +#' @importFrom elm add_eml_header +#' @importFrom elm read_md +#' @importFrom elm write_eml #' @export build_metadata <- function(x = "data", file = "./data/eml.xml"){ if(!file.exists(x)){ - abort("`x` doesn't exist in specified location.") + cli::cli_abort("{.file {x}} doesn't exist in specified location.") } # import file, ensure EML metadata is added, convert to XML - read_elm(x) |> - add_elm_header() |> - write_elm(file = file) + progress_update("Reading file...") + metadata_file <- read_md(x) + + progress_update("Building xml components...") + built_file <- add_eml_header(metadata_file) + + progress_update("Writing file...") + write_eml(built_file, file = file) + + cli::cli_alert_success("Metadata successfully built. Saved as {.file /data/eml.xml}.") + cli::cli_progress_done() } \ No newline at end of file diff --git a/R/build_schema.R b/R/build_schema.R index 65aa669..262f0a6 100644 --- a/R/build_schema.R +++ b/R/build_schema.R @@ -9,18 +9,55 @@ #' @param file (string) A file name for the resulting schema document. #' @returns Does not return an object to the workspace; called for the side #' effect of building a file named `meta.xml` in the specified directory. -#' @importFrom elm write_elm +#' @importFrom elm write_eml #' @importFrom glue glue #' @importFrom rlang abort #' @export build_schema <- function(x = "data", file = "./data/meta.xml") { x <- get_default_directory(x) - x |> - detect_dwc_files() |> - detect_dwc_fields() |> - add_front_matter() |> - write_elm(file = file) + + files <- detect_dwc_files(x) + fields <- detect_dwc_fields(files) + result <- add_front_matter(fields) + + progress_update("Writing file...") + write_eml(result, file = file) + + cli::cli_alert_success("Schema successfully built. Saved as {.file /data/meta.xml}.") + cli::cli_progress_done() +} + +#' Wait time +#' @noRd +#' @keywords Internal +wait <- function(seconds = 1) { + Sys.sleep(seconds) +} + + +#' Function progress message +#' +#' @description +#' Informs users about the progress of their ongoing function steps. +#' +#' @importFrom cli cli_progress_step +#' @importFrom cli cli_progress_update +#' @noRd +#' @keywords Internal +progress_update <- function(message) { + cli::cli_progress_step( + paste0( + message + ), + spinner = TRUE + ) + + for (i in 1:100) { + wait(0.0001) # remove zeroes to make messages slower + cli::cli_progress_update() + } + } #' Internal function to create core/extension framework for files @@ -35,6 +72,7 @@ build_schema <- function(x = "data", #' @noRd #' @keywords Internal detect_dwc_files <- function(directory){ + progress_update("Detecting Darwin Core files...") available_exts <- dwc_extensions() supported_files <- available_exts |> pull("file") @@ -47,8 +85,8 @@ detect_dwc_files <- function(directory){ sep = ", ", last = " or ") bullets <- c( - glue("Specified directory (\"{directory}\") does not contain any dwc-compliant csv files."), - i = glue("Accepted names are {file_names}")) + glue("Specified directory (\"{directory}\") does not contain any Darwin Core-compliant csv files."), + i = glue("Accepted names are {file_names}.")) abort(bullets) } available_exts |> @@ -100,6 +138,7 @@ dwc_extensions <- function(){ #' @noRd #' @keywords Internal detect_dwc_fields <- function(df){ + progress_update("Detecting Darwin Core fields in dataset...") split(df, seq_len(nrow(df))) |> map(\(x){ bind_rows(create_schema_row(x), @@ -181,6 +220,7 @@ get_field_names <- function(file){ #' @noRd #' @keywords Internal add_front_matter <- function(df){ + progress_update("Building xml components...") front_row <- tibble( level = 1, label = "archive", diff --git a/R/check_archive.R b/R/check_archive.R index 779e8ec..4595e62 100644 --- a/R/check_archive.R +++ b/R/check_archive.R @@ -1,7 +1,7 @@ #' Check an archive against Darwin Core standards #' #' This is a wrapper to two other packages; schema and EML files (i.e. xml) are -#' checked with the `elm` package; csv files are checked with the `corroboree` +#' checked with the `elm` package; csv files are checked with the `corella` #' package. #' @param x (string) A directory containing the files to be published, or #' optionally a `.zip` file built from the same (i.e. with `build_archive()`). @@ -26,8 +26,8 @@ check_archive <- function(x = "data"){ } #' Internal function to check all files -#' @importFrom corroboree check_occurrences -#' @importFrom elm check_elm +#' @importFrom corella check_occurrences +#' @importFrom elm check_eml #' @importFrom purrr map #' @importFrom readr read_csv #' @noRd @@ -38,8 +38,8 @@ check_files <- function(filenames){ switch(a, "occurrences.csv" = {read_csv(a) |> check_occurrences()}, - "meta.xml" = {check_elm(a)}, - "eml.xml" = {check_elm(a)} + "meta.xml" = {check_eml(a)}, + "eml.xml" = {check_eml(a)} ) }) |> invisible() diff --git a/R/galaxias-package.R b/R/galaxias-package.R index 9eef2ed..a7308ac 100644 --- a/R/galaxias-package.R +++ b/R/galaxias-package.R @@ -23,7 +23,7 @@ #' * [build_archive()] Convert a directory to a Darwin Core Archive #' #' **Validate an archive** -#' * [check_archive()] Check your archive using the `elm` and `corroboree` packages +#' * [check_archive()] Check your archive using the `elm` and `corella` packages #' * [galaxias_config()] Store credentials for your API call #' * [validate_archive()] Check your archive using the GBIF 'validator' API #' * [print_validation()] Methods for displaying API responses diff --git a/R/galaxias_config.R b/R/galaxias_config.R index 56bbfc7..351bdc2 100644 --- a/R/galaxias_config.R +++ b/R/galaxias_config.R @@ -7,8 +7,8 @@ #' Note that unlike `galah`, you cannot set a 'default' provider in `galaxias`; #' the organisation is always an argument to the function in question. Also #' unlike `galah`, `galaxias_config()` enables you to store configuration -#' details for multiple organisations at once. Currently, the this function is -#' only useful in relation to `validate_archive()`, and only then for validating +#' details for multiple organisations at once. Currently, this function is +#' only useful to `validate_archive()`, and only then for validating #' via GBIF. #' @name galaxias_config #' @param gbif A list containing the entries `username`, `email` and `password` diff --git a/README.Rmd b/README.Rmd index b86eb70..13e6005 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,6 +13,10 @@ knitr::opts_chunk$set( # galaxias
+ +[![CRAN status](https://www.r-pkg.org/badges/version/galaxias)](https://CRAN.R-project.org/package=galaxias) + + ## Overview `galaxias` is an R package that helps users describe, package and share @@ -48,8 +52,8 @@ library(galaxias) - Create a new RStudio project or package for storing biodiversity data and data-processing scripts using `galaxias_project()`. - - Create metadata and schema documents to describe the origin and structure - of your data using `build_metadata()` and `build_schema()`. + - Create documents to describe the origin and structure of your data using + `build_metadata()` and `build_schema()`. - Zip up your data for sharing or publication using `build_archive()`. - Check data for consistency with the Darwin Core standard, either locally using `check_archive()`, or via API using `validate_archive()`. @@ -58,8 +62,8 @@ library(galaxias) `galaxias` is part of a group of packages that help users publish data using the Darwin Core standard. The other packages are: - - `corroboree` for converting tibbles to the required column names, and; - - `elm` for converting markdown files to `xml`. + - [`corella`](https://github.com/AtlasOfLivingAustralia/corella) for converting tibbles to the required column names + - [`elm`](https://github.com/AtlasOfLivingAustralia/elm) for converting markdown files to `xml`. ## Citing galaxias diff --git a/README.md b/README.md index 2a56e8f..1b190a5 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,12 @@ # galaxias
+ + +[![CRAN +status](https://www.r-pkg.org/badges/version/galaxias)](https://CRAN.R-project.org/package=galaxias) + + ## Overview `galaxias` is an R package that helps users describe, package and share @@ -40,8 +46,8 @@ library(galaxias) - Create a new RStudio project or package for storing biodiversity data and data-processing scripts using `galaxias_project()`. -- Create metadata and schema documents to describe the origin and - structure of your data using `build_metadata()` and `build_schema()`. +- Create documents to describe the origin and structure of your data + using `build_metadata()` and `build_schema()`. - Zip up your data for sharing or publication using `build_archive()`. - Check data for consistency with the Darwin Core standard, either locally using `check_archive()`, or via API using @@ -50,8 +56,10 @@ library(galaxias) `galaxias` is part of a group of packages that help users publish data using the Darwin Core standard. The other packages are: -- `corroboree` for converting tibbles to the required column names, and; -- `elm` for converting markdown files to `xml`. +- [`corella`](https://github.com/AtlasOfLivingAustralia/corella) for + converting tibbles to the required column names +- [`elm`](https://github.com/AtlasOfLivingAustralia/elm) for converting + markdown files to `xml`. ## Citing galaxias diff --git a/_pkgdown.yml b/_pkgdown.yml index 4e81dc1..f1b37f3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,7 +1,9 @@ +url: https://galaxias.ala.org.au template: bootstrap: 5 bslib: - code_font: {google: "Source Code Pro"} + code_font: + google: Source Code Pro params: bootswatch: cerulean development: @@ -30,8 +32,7 @@ navbar: href: news/index.html reference: - title: Set up a project - contents: - - galaxias_project + contents: galaxias_project - title: Construct an archive contents: - build_metadata @@ -43,3 +44,4 @@ reference: - galaxias_config - validate_archive - print_validation + diff --git a/man/build_archive.Rd b/man/build_archive.Rd index 5fbd93d..9ebc7f6 100644 --- a/man/build_archive.Rd +++ b/man/build_archive.Rd @@ -32,7 +32,7 @@ This function looks for three types of objects in the specified \code{directory} \itemize{ \item One or more \code{csv} files such as \code{occurrences.csv} &/or \code{events.csv}. These will be manipulated versions of the raw dataset, which have been -altered to use Darwin Core terms as column headers. See the \code{corroboree} +altered to use Darwin Core terms as column headers. See the \code{corella} package for details. \item A metadata statement, stored in xml using the filename \code{eml.xml}. The function \code{use_metadata()} from the \code{elm} package is a good starting point diff --git a/man/build_metadata.Rd b/man/build_metadata.Rd index 267b166..ff89339 100644 --- a/man/build_metadata.Rd +++ b/man/build_metadata.Rd @@ -7,10 +7,10 @@ build_metadata(x = "data", file = "./data/eml.xml") } \arguments{ -\item{x}{Path to a metadata statement stored in markdown format (.md).} - \item{file}{A file where the result should be saved. Defaults to \code{data/eml.xml}.} + +\item{path}{Path to a metadata statement stored in markdown format (.md).} } \value{ Does not return an object to the workspace; called for the side @@ -24,7 +24,7 @@ specified using the \code{directory} argument. } \details{ This function is a fairly shallow wrapper on top of functionality build -in the \code{elm} package, particularly \code{read_elm()} and \code{write_elm()}. You can +in the \code{elm} package, particularly \code{read_md()} and \code{write_eml()}. You can use that package to gain greater control, or to debug problems, should you wish. } diff --git a/man/check_archive.Rd b/man/check_archive.Rd index b3fe62b..d9d5f04 100644 --- a/man/check_archive.Rd +++ b/man/check_archive.Rd @@ -18,7 +18,7 @@ the console. } \description{ This is a wrapper to two other packages; schema and EML files (i.e. xml) are -checked with the \code{elm} package; csv files are checked with the \code{corroboree} +checked with the \code{elm} package; csv files are checked with the \code{corella} package. } \seealso{ diff --git a/man/galaxias-package.Rd b/man/galaxias-package.Rd index 497fc2c..0e0bfd6 100644 --- a/man/galaxias-package.Rd +++ b/man/galaxias-package.Rd @@ -32,7 +32,7 @@ If you have any questions, comments or suggestions, please email \strong{Validate an archive} \itemize{ -\item \code{\link[=check_archive]{check_archive()}} Check your archive using the \code{elm} and \code{corroboree} packages +\item \code{\link[=check_archive]{check_archive()}} Check your archive using the \code{elm} and \code{corella} packages \item \code{\link[=galaxias_config]{galaxias_config()}} Store credentials for your API call \item \code{\link[=validate_archive]{validate_archive()}} Check your archive using the GBIF 'validator' API \item \code{\link[=print_validation]{print_validation()}} Methods for displaying API responses diff --git a/man/galaxias_config.Rd b/man/galaxias_config.Rd index 15e7456..7b33a2e 100644 --- a/man/galaxias_config.Rd +++ b/man/galaxias_config.Rd @@ -26,7 +26,7 @@ that information for access by \code{galaxias} API functions. Note that unlike \code{galah}, you cannot set a 'default' provider in \code{galaxias}; the organisation is always an argument to the function in question. Also unlike \code{galah}, \code{galaxias_config()} enables you to store configuration -details for multiple organisations at once. Currently, the this function is -only useful in relation to \code{validate_archive()}, and only then for validating +details for multiple organisations at once. Currently, this function is +only useful to \code{validate_archive()}, and only then for validating via GBIF. } diff --git a/vignettes/quick_start_guide.Rmd b/vignettes/quick_start_guide.Rmd index 75e0e91..518b4b6 100644 --- a/vignettes/quick_start_guide.Rmd +++ b/vignettes/quick_start_guide.Rmd @@ -65,12 +65,12 @@ df <- tibble( (Note that normally you'd import your data from an external file (e.g. using `readr::read_csv()`), but we've constructed one here for example purposes.) -We recommend using the `corroboree` package for converting tibbles to Darwin +We recommend using the `corella` package for converting tibbles to Darwin Core. A minimally complete set of individual observations formatted using this package might look like this: ```{r} -library(corroboree) +library(corella) library(lubridate) occurrences <- df |>