Merge pull request #22 from AtlasOfLivingAustralia/development

Development
AtlasOfLivingAustralia · Dec 5, 2024 · 1edf7d1 · 1edf7d1
2 parents 4a298ef + b8182a8
commit 1edf7d1
Show file tree

Hide file tree

Showing 20 changed files with 203 additions and 70 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,3 +8,4 @@
 ^inst/hex.R$
 README.Rmd
 ^doc$
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,49 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+  release:
+    types: [published]
+  workflow_dispatch:
+
+name: pkgdown.yaml
+
+permissions: read-all
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy to GitHub pages 🚀
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/[email protected]
+        with:
+          clean: false
+          branch: gh-pages
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -24,7 +24,7 @@ Depends:
   R (>= 4.3.0)
 Imports:
   cli,
-  corroboree,
+  corella,
   curl,
   dplyr,
   elm,
@@ -46,6 +46,7 @@ Suggests:
   testthat (>= 3.0.0),
   xml2
 License: MPL-2.0
+URL: https://galaxias.ala.org.au
 BugReports: https://github.com/AtlasOfLivingAustralia/galaxias/issues
 Maintainer: Martin Westgate <[email protected]>
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -13,9 +13,13 @@ export(galaxias_project)
 export(get_validator_report)
 export(validate_archive)
 importFrom(cli,cat_line)
+importFrom(cli,cli_abort)
 importFrom(cli,cli_h2)
 importFrom(cli,cli_h3)
-importFrom(corroboree,check_occurrences)
+importFrom(cli,cli_inform)
+importFrom(cli,cli_progress_step)
+importFrom(cli,cli_progress_update)
+importFrom(corella,check_occurrences)
 importFrom(curl,form_data)
 importFrom(curl,form_file)
 importFrom(dplyr,bind_rows)
@@ -24,11 +28,11 @@ importFrom(dplyr,mutate)
 importFrom(dplyr,pull)
 importFrom(dplyr,select)
 importFrom(dplyr,slice_head)
-importFrom(elm,add_elm_header)
-importFrom(elm,check_elm)
-importFrom(elm,read_elm)
+importFrom(elm,add_eml_header)
+importFrom(elm,check_eml)
+importFrom(elm,read_md)
 importFrom(elm,use_metadata)
-importFrom(elm,write_elm)
+importFrom(elm,write_eml)
 importFrom(glue,glue)
 importFrom(glue,glue_collapse)
 importFrom(httr2,req_body_multipart)

diff --git a/R/build_archive.R b/R/build_archive.R
@@ -13,7 +13,7 @@
 #' 
 #'  * One or more `csv` files such as `occurrences.csv` &/or `events.csv`. 
 #'    These will be manipulated versions of the raw dataset, which have been
-#'    altered to use Darwin Core terms as column headers. See the `corroboree`
+#'    altered to use Darwin Core terms as column headers. See the `corella`
 #'    package for details.
 #'  * A metadata statement, stored in xml using the filename `eml.xml`. The
 #'    function `use_metadata()` from the `elm` package is a good starting point
@@ -34,12 +34,24 @@
 #' @export
 build_archive <- function(x = "data", file) {
   x <- get_default_directory(x)
+
+  progress_update("Retrieving metadata...")
   files_in <- find_data(x)
+
+  progress_update("Creating zip folder...")
   file_out <- get_default_file(file)
+
+  progress_update("Building Darwin Core Archive...")
   zip::zip(zipfile = file_out, 
            files = files_in,
            mode = "cherry-pick")
-  invisible(return(file_out))
+
+  cli::cli_alert_success("Darwin Core Archive successfully built. \nSaved as {.file {file_out}}.")
+  cli::cli_progress_done()
+
+  # invisible(return(file_out)) # might need this to save
+
+
 }
 
 #' Simple function to specify a zip file if no arg given
@@ -52,7 +64,7 @@ get_default_file <- function(file){
     glue("{getwd()}.zip")
   }else{
     if(!grepl(".zip$", file)){
-      abort("file must end in `.zip`")
+      abort("File must end in `.zip`.")
     }else{
       file
     }
@@ -62,17 +74,18 @@ get_default_file <- function(file){
 #' Simple function to check that a `data` directory exists if no arg given
 #' @importFrom rlang abort
 #' @importFrom rlang inform
+#' @importFrom cli cli_inform
 #' @importFrom glue glue
 #' @noRd
 #' @keywords Internal
 get_default_directory <- function(x){
   if(missing(x)){
     if(dir.exists("data")){
-      inform("`x` is missing; defaulting to `data` folder")
+      cli_inform("Missing `directory`. Defaulting to {.file data} folder.")
       x <- "data"
     }else{
-      abort(c("`x` is missing, and `data` folder is missing", 
-              i = "please supply a folder containing required data"))
+      abort(c("Missing `directory` and missing `data` folder.", 
+              i = "Please specify a folder containing required data."))
     }
   }else{
     if(!dir.exists(x)){
@@ -86,15 +99,17 @@ get_default_directory <- function(x){
 #' Find metadata info in a repository
 #' @importFrom glue glue_collapse
 #' @importFrom rlang abort
+#' @importFrom cli cli_abort
 #' @importFrom rlang caller_env
 #' @noRd
 #' @keywords Internal
 find_data <- function(directory,
                       call = caller_env()){
   if(!file.exists(directory)){
-    bullets <- c(glue("`{directory}` directory is required, but missing."),
-                 i = "use `usethis::use_data()` to add data to your project.")
-    abort(bullets,
+    bullets <- c(glue("Missing `directory`."),
+                 i = "Use `usethis::use_data()` to add data to your project.",
+                 x = "Can't find directory `{directory}`.")
+    cli_abort(bullets,
           call = call)
   }
   accepted_names <- c("occurrences", 
@@ -105,24 +120,24 @@ find_data <- function(directory,
                           pattern = glue("^{accepted_names}.csv$"))
   if(length(file_list) < 1){
     bullets <- c("No data meeting Darwin Core requirements is given in `data`.",
-                 i = "use `add_bd_data_raw()` for examples of how to add raw data to your package",
-                 i =  "use `usethis::use_data()` to add data to your package")
+                 i = "Use `add_bd_data_raw()` for examples of how to add raw data to your package.",
+                 i =  "Use `usethis::use_data()` to add data to your package.")
     abort(bullets,
           call = call)
   }
 
   if(!file.exists(glue("{directory}/meta.xml"))){
-    bullets <- c("No schema file (`meta.xml`) is present in the specified directory.",
-                 i = "use `build_schema()` to create one")
-    abort(bullets,
+    bullets <- c("No schema file ({.file meta.xml}) is present in the specified directory.",
+                 i = "Use `build_schema()` to create a schema file.")
+    cli_abort(bullets,
           call = call)
   }
 
   if(!file.exists(glue("{directory}/eml.xml"))){
-    bullets <- c("No metadata statement (`eml.xml`) is present in the specified directory.",
-                 i = "See `elm::use_metadata()` for an example metadata statement,",
-                 i = "then `build_metadata()` to convert to `eml.xml`.")
-    abort(bullets,
+    bullets <- c("No metadata statement ({.file eml.xml}) is present in the specified directory.",
+                 i = "See `elm::use_metadata()` for an example metadata statement.",
+                 i = "Use `build_metadata()` to convert to {.file eml.xml}.")
+    cli_abort(bullets,
           call = call)
   }
 

diff --git a/R/build_metadata.R b/R/build_metadata.R
@@ -6,25 +6,33 @@
 #' specified using the `directory` argument.
 #' 
 #' This function is a fairly shallow wrapper on top of functionality build
-#' in the `elm` package, particularly `read_elm()` and `write_elm()`. You can 
+#' in the `elm` package, particularly `read_md()` and `write_eml()`. You can 
 #' use that package to gain greater control, or to debug problems, should you 
 #' wish.
-#' @param x Path to a metadata statement stored in markdown format (.md).
+#' @param path Path to a metadata statement stored in markdown format (.md).
 #' @param file A file where the result should be saved. Defaults to 
 #' `data/eml.xml`.
 #' @returns Does not return an object to the workspace; called for the side
 #' effect of building a file named `meta.xml` in the `data` directory.
-#' @importFrom elm add_elm_header
-#' @importFrom elm read_elm
-#' @importFrom elm write_elm
+#' @importFrom elm add_eml_header
+#' @importFrom elm read_md
+#' @importFrom elm write_eml
 #' @export
 build_metadata <- function(x = "data", 
                            file = "./data/eml.xml"){
   if(!file.exists(x)){
-    abort("`x` doesn't exist in specified location.")
+    cli::cli_abort("{.file {x}} doesn't exist in specified location.")
   }
   # import file, ensure EML metadata is added, convert to XML
-  read_elm(x) |>
-    add_elm_header() |>
-    write_elm(file = file)
+  progress_update("Reading file...")
+  metadata_file <- read_md(x)
+
+  progress_update("Building xml components...")
+  built_file <- add_eml_header(metadata_file)
+
+  progress_update("Writing file...")
+  write_eml(built_file, file = file)
+
+  cli::cli_alert_success("Metadata successfully built. Saved as {.file /data/eml.xml}.")
+  cli::cli_progress_done()
 }
diff --git a/R/build_schema.R b/R/build_schema.R
@@ -9,18 +9,55 @@
 #' @param file (string) A file name for the resulting schema document.
 #' @returns Does not return an object to the workspace; called for the side
 #' effect of building a file named `meta.xml` in the specified directory.
-#' @importFrom elm write_elm
+#' @importFrom elm write_eml
 #' @importFrom glue glue
 #' @importFrom rlang abort
 #' @export
 build_schema <- function(x = "data", 
                          file = "./data/meta.xml") {
   x <- get_default_directory(x)
-  x |>
-    detect_dwc_files() |>
-    detect_dwc_fields() |>
-    add_front_matter() |>
-    write_elm(file = file)
+
+  files <- detect_dwc_files(x)
+  fields <- detect_dwc_fields(files)
+  result <- add_front_matter(fields)
+
+  progress_update("Writing file...")
+  write_eml(result, file = file)
+
+  cli::cli_alert_success("Schema successfully built. Saved as {.file /data/meta.xml}.")
+  cli::cli_progress_done()
+}
+
+#' Wait time
+#' @noRd
+#' @keywords Internal
+wait <- function(seconds = 1) {
+  Sys.sleep(seconds)
+}
+
+
+#' Function progress message
+#'
+#' @description
+#' Informs users about the progress of their ongoing function steps.
+#'
+#' @importFrom cli cli_progress_step
+#' @importFrom cli cli_progress_update
+#' @noRd
+#' @keywords Internal
+progress_update <- function(message) {
+  cli::cli_progress_step(
+    paste0(
+      message
+    ),
+    spinner = TRUE
+  )
+
+  for (i in 1:100) {
+    wait(0.0001) # remove zeroes to make messages slower
+    cli::cli_progress_update()
+  }
+
 }
 
 #' Internal function to create core/extension framework for files
@@ -35,6 +72,7 @@ build_schema <- function(x = "data",
 #' @noRd
 #' @keywords Internal
 detect_dwc_files <- function(directory){
+  progress_update("Detecting Darwin Core files...")
   available_exts <- dwc_extensions()
   supported_files <- available_exts |>
     pull("file")
@@ -47,8 +85,8 @@ detect_dwc_files <- function(directory){
                                 sep = ", ",
                                 last = " or ")
     bullets <- c(
-      glue("Specified directory (\"{directory}\") does not contain any dwc-compliant csv files."),
-      i = glue("Accepted names are {file_names}"))
+      glue("Specified directory (\"{directory}\") does not contain any Darwin Core-compliant csv files."),
+      i = glue("Accepted names are {file_names}."))
     abort(bullets)
   }
   available_exts |>
@@ -100,6 +138,7 @@ dwc_extensions <- function(){
 #' @noRd
 #' @keywords Internal
 detect_dwc_fields <- function(df){
+  progress_update("Detecting Darwin Core fields in dataset...")
   split(df, seq_len(nrow(df))) |>
     map(\(x){
       bind_rows(create_schema_row(x),
@@ -181,6 +220,7 @@ get_field_names <- function(file){
 #' @noRd
 #' @keywords Internal
 add_front_matter <- function(df){
+  progress_update("Building xml components...")
   front_row <- tibble(
     level = 1,
     label = "archive",

diff --git a/R/check_archive.R b/R/check_archive.R
@@ -1,7 +1,7 @@
 #' Check an archive against Darwin Core standards
 #' 
 #' This is a wrapper to two other packages; schema and EML files (i.e. xml) are
-#' checked with the `elm` package; csv files are checked with the `corroboree`
+#' checked with the `elm` package; csv files are checked with the `corella`
 #' package.
 #' @param x (string) A directory containing the files to be published, or 
 #' optionally a `.zip` file built from the same (i.e. with `build_archive()`). 
@@ -26,8 +26,8 @@ check_archive <- function(x = "data"){
 }
 
 #' Internal function to check all files
-#' @importFrom corroboree check_occurrences
-#' @importFrom elm check_elm
+#' @importFrom corella check_occurrences
+#' @importFrom elm check_eml
 #' @importFrom purrr map
 #' @importFrom readr read_csv
 #' @noRd
@@ -38,8 +38,8 @@ check_files <- function(filenames){
         switch(a, 
                "occurrences.csv" = {read_csv(a) |>
                                     check_occurrences()},
-               "meta.xml" = {check_elm(a)},
-               "eml.xml" = {check_elm(a)}
+               "meta.xml" = {check_eml(a)},
+               "eml.xml" = {check_eml(a)}
        )
   }) |>
     invisible()