Skip to content

Commit

Permalink
Merge pull request #22 from AtlasOfLivingAustralia/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
mjwestgate authored Dec 5, 2024
2 parents 4a298ef + b8182a8 commit 1edf7d1
Show file tree
Hide file tree
Showing 20 changed files with 203 additions and 70 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
^inst/hex.R$
README.Rmd
^doc$
^\.github$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
49 changes: 49 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
release:
types: [published]
workflow_dispatch:

name: pkgdown.yaml

permissions: read-all

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::pkgdown, local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/[email protected]
with:
clean: false
branch: gh-pages
folder: docs
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Depends:
R (>= 4.3.0)
Imports:
cli,
corroboree,
corella,
curl,
dplyr,
elm,
Expand All @@ -46,6 +46,7 @@ Suggests:
testthat (>= 3.0.0),
xml2
License: MPL-2.0
URL: https://galaxias.ala.org.au
BugReports: https://github.com/AtlasOfLivingAustralia/galaxias/issues
Maintainer: Martin Westgate <[email protected]>
Encoding: UTF-8
Expand Down
14 changes: 9 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ export(galaxias_project)
export(get_validator_report)
export(validate_archive)
importFrom(cli,cat_line)
importFrom(cli,cli_abort)
importFrom(cli,cli_h2)
importFrom(cli,cli_h3)
importFrom(corroboree,check_occurrences)
importFrom(cli,cli_inform)
importFrom(cli,cli_progress_step)
importFrom(cli,cli_progress_update)
importFrom(corella,check_occurrences)
importFrom(curl,form_data)
importFrom(curl,form_file)
importFrom(dplyr,bind_rows)
Expand All @@ -24,11 +28,11 @@ importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,slice_head)
importFrom(elm,add_elm_header)
importFrom(elm,check_elm)
importFrom(elm,read_elm)
importFrom(elm,add_eml_header)
importFrom(elm,check_eml)
importFrom(elm,read_md)
importFrom(elm,use_metadata)
importFrom(elm,write_elm)
importFrom(elm,write_eml)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(httr2,req_body_multipart)
Expand Down
51 changes: 33 additions & 18 deletions R/build_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#'
#' * One or more `csv` files such as `occurrences.csv` &/or `events.csv`.
#' These will be manipulated versions of the raw dataset, which have been
#' altered to use Darwin Core terms as column headers. See the `corroboree`
#' altered to use Darwin Core terms as column headers. See the `corella`
#' package for details.
#' * A metadata statement, stored in xml using the filename `eml.xml`. The
#' function `use_metadata()` from the `elm` package is a good starting point
Expand All @@ -34,12 +34,24 @@
#' @export
build_archive <- function(x = "data", file) {
x <- get_default_directory(x)

progress_update("Retrieving metadata...")
files_in <- find_data(x)

progress_update("Creating zip folder...")
file_out <- get_default_file(file)

progress_update("Building Darwin Core Archive...")
zip::zip(zipfile = file_out,
files = files_in,
mode = "cherry-pick")
invisible(return(file_out))

cli::cli_alert_success("Darwin Core Archive successfully built. \nSaved as {.file {file_out}}.")
cli::cli_progress_done()

# invisible(return(file_out)) # might need this to save


}

#' Simple function to specify a zip file if no arg given
Expand All @@ -52,7 +64,7 @@ get_default_file <- function(file){
glue("{getwd()}.zip")
}else{
if(!grepl(".zip$", file)){
abort("file must end in `.zip`")
abort("File must end in `.zip`.")
}else{
file
}
Expand All @@ -62,17 +74,18 @@ get_default_file <- function(file){
#' Simple function to check that a `data` directory exists if no arg given
#' @importFrom rlang abort
#' @importFrom rlang inform
#' @importFrom cli cli_inform
#' @importFrom glue glue
#' @noRd
#' @keywords Internal
get_default_directory <- function(x){
if(missing(x)){
if(dir.exists("data")){
inform("`x` is missing; defaulting to `data` folder")
cli_inform("Missing `directory`. Defaulting to {.file data} folder.")
x <- "data"
}else{
abort(c("`x` is missing, and `data` folder is missing",
i = "please supply a folder containing required data"))
abort(c("Missing `directory` and missing `data` folder.",
i = "Please specify a folder containing required data."))
}
}else{
if(!dir.exists(x)){
Expand All @@ -86,15 +99,17 @@ get_default_directory <- function(x){
#' Find metadata info in a repository
#' @importFrom glue glue_collapse
#' @importFrom rlang abort
#' @importFrom cli cli_abort
#' @importFrom rlang caller_env
#' @noRd
#' @keywords Internal
find_data <- function(directory,
call = caller_env()){
if(!file.exists(directory)){
bullets <- c(glue("`{directory}` directory is required, but missing."),
i = "use `usethis::use_data()` to add data to your project.")
abort(bullets,
bullets <- c(glue("Missing `directory`."),
i = "Use `usethis::use_data()` to add data to your project.",
x = "Can't find directory `{directory}`.")
cli_abort(bullets,
call = call)
}
accepted_names <- c("occurrences",
Expand All @@ -105,24 +120,24 @@ find_data <- function(directory,
pattern = glue("^{accepted_names}.csv$"))
if(length(file_list) < 1){
bullets <- c("No data meeting Darwin Core requirements is given in `data`.",
i = "use `add_bd_data_raw()` for examples of how to add raw data to your package",
i = "use `usethis::use_data()` to add data to your package")
i = "Use `add_bd_data_raw()` for examples of how to add raw data to your package.",
i = "Use `usethis::use_data()` to add data to your package.")
abort(bullets,
call = call)
}

if(!file.exists(glue("{directory}/meta.xml"))){
bullets <- c("No schema file (`meta.xml`) is present in the specified directory.",
i = "use `build_schema()` to create one")
abort(bullets,
bullets <- c("No schema file ({.file meta.xml}) is present in the specified directory.",
i = "Use `build_schema()` to create a schema file.")
cli_abort(bullets,
call = call)
}

if(!file.exists(glue("{directory}/eml.xml"))){
bullets <- c("No metadata statement (`eml.xml`) is present in the specified directory.",
i = "See `elm::use_metadata()` for an example metadata statement,",
i = "then `build_metadata()` to convert to `eml.xml`.")
abort(bullets,
bullets <- c("No metadata statement ({.file eml.xml}) is present in the specified directory.",
i = "See `elm::use_metadata()` for an example metadata statement.",
i = "Use `build_metadata()` to convert to {.file eml.xml}.")
cli_abort(bullets,
call = call)
}

Expand Down
26 changes: 17 additions & 9 deletions R/build_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,33 @@
#' specified using the `directory` argument.
#'
#' This function is a fairly shallow wrapper on top of functionality build
#' in the `elm` package, particularly `read_elm()` and `write_elm()`. You can
#' in the `elm` package, particularly `read_md()` and `write_eml()`. You can
#' use that package to gain greater control, or to debug problems, should you
#' wish.
#' @param x Path to a metadata statement stored in markdown format (.md).
#' @param path Path to a metadata statement stored in markdown format (.md).
#' @param file A file where the result should be saved. Defaults to
#' `data/eml.xml`.
#' @returns Does not return an object to the workspace; called for the side
#' effect of building a file named `meta.xml` in the `data` directory.
#' @importFrom elm add_elm_header
#' @importFrom elm read_elm
#' @importFrom elm write_elm
#' @importFrom elm add_eml_header
#' @importFrom elm read_md
#' @importFrom elm write_eml
#' @export
build_metadata <- function(x = "data",
file = "./data/eml.xml"){
if(!file.exists(x)){
abort("`x` doesn't exist in specified location.")
cli::cli_abort("{.file {x}} doesn't exist in specified location.")
}
# import file, ensure EML metadata is added, convert to XML
read_elm(x) |>
add_elm_header() |>
write_elm(file = file)
progress_update("Reading file...")
metadata_file <- read_md(x)

progress_update("Building xml components...")
built_file <- add_eml_header(metadata_file)

progress_update("Writing file...")
write_eml(built_file, file = file)

cli::cli_alert_success("Metadata successfully built. Saved as {.file /data/eml.xml}.")
cli::cli_progress_done()
}
56 changes: 48 additions & 8 deletions R/build_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,55 @@
#' @param file (string) A file name for the resulting schema document.
#' @returns Does not return an object to the workspace; called for the side
#' effect of building a file named `meta.xml` in the specified directory.
#' @importFrom elm write_elm
#' @importFrom elm write_eml
#' @importFrom glue glue
#' @importFrom rlang abort
#' @export
build_schema <- function(x = "data",
file = "./data/meta.xml") {
x <- get_default_directory(x)
x |>
detect_dwc_files() |>
detect_dwc_fields() |>
add_front_matter() |>
write_elm(file = file)

files <- detect_dwc_files(x)
fields <- detect_dwc_fields(files)
result <- add_front_matter(fields)

progress_update("Writing file...")
write_eml(result, file = file)

cli::cli_alert_success("Schema successfully built. Saved as {.file /data/meta.xml}.")
cli::cli_progress_done()
}

#' Wait time
#' @noRd
#' @keywords Internal
wait <- function(seconds = 1) {
Sys.sleep(seconds)
}


#' Function progress message
#'
#' @description
#' Informs users about the progress of their ongoing function steps.
#'
#' @importFrom cli cli_progress_step
#' @importFrom cli cli_progress_update
#' @noRd
#' @keywords Internal
progress_update <- function(message) {
cli::cli_progress_step(
paste0(
message
),
spinner = TRUE
)

for (i in 1:100) {
wait(0.0001) # remove zeroes to make messages slower
cli::cli_progress_update()
}

}

#' Internal function to create core/extension framework for files
Expand All @@ -35,6 +72,7 @@ build_schema <- function(x = "data",
#' @noRd
#' @keywords Internal
detect_dwc_files <- function(directory){
progress_update("Detecting Darwin Core files...")
available_exts <- dwc_extensions()
supported_files <- available_exts |>
pull("file")
Expand All @@ -47,8 +85,8 @@ detect_dwc_files <- function(directory){
sep = ", ",
last = " or ")
bullets <- c(
glue("Specified directory (\"{directory}\") does not contain any dwc-compliant csv files."),
i = glue("Accepted names are {file_names}"))
glue("Specified directory (\"{directory}\") does not contain any Darwin Core-compliant csv files."),
i = glue("Accepted names are {file_names}."))
abort(bullets)
}
available_exts |>
Expand Down Expand Up @@ -100,6 +138,7 @@ dwc_extensions <- function(){
#' @noRd
#' @keywords Internal
detect_dwc_fields <- function(df){
progress_update("Detecting Darwin Core fields in dataset...")
split(df, seq_len(nrow(df))) |>
map(\(x){
bind_rows(create_schema_row(x),
Expand Down Expand Up @@ -181,6 +220,7 @@ get_field_names <- function(file){
#' @noRd
#' @keywords Internal
add_front_matter <- function(df){
progress_update("Building xml components...")
front_row <- tibble(
level = 1,
label = "archive",
Expand Down
10 changes: 5 additions & 5 deletions R/check_archive.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Check an archive against Darwin Core standards
#'
#' This is a wrapper to two other packages; schema and EML files (i.e. xml) are
#' checked with the `elm` package; csv files are checked with the `corroboree`
#' checked with the `elm` package; csv files are checked with the `corella`
#' package.
#' @param x (string) A directory containing the files to be published, or
#' optionally a `.zip` file built from the same (i.e. with `build_archive()`).
Expand All @@ -26,8 +26,8 @@ check_archive <- function(x = "data"){
}

#' Internal function to check all files
#' @importFrom corroboree check_occurrences
#' @importFrom elm check_elm
#' @importFrom corella check_occurrences
#' @importFrom elm check_eml
#' @importFrom purrr map
#' @importFrom readr read_csv
#' @noRd
Expand All @@ -38,8 +38,8 @@ check_files <- function(filenames){
switch(a,
"occurrences.csv" = {read_csv(a) |>
check_occurrences()},
"meta.xml" = {check_elm(a)},
"eml.xml" = {check_elm(a)}
"meta.xml" = {check_eml(a)},
"eml.xml" = {check_eml(a)}
)
}) |>
invisible()
Expand Down
Loading

0 comments on commit 1edf7d1

Please sign in to comment.