Skip to content

Commit

Permalink
Config reader with expected schema validation
Browse files Browse the repository at this point in the history
  • Loading branch information
zsusswein committed Aug 7, 2024
1 parent d3b008f commit 196d7e2
Show file tree
Hide file tree
Showing 11 changed files with 494 additions and 2 deletions.
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ repos:
hooks:
- id: style-files
args: [--style_pkg=styler, --style_fun=tidyverse_style]
- id: roxygenize
- id: use-tidy-description
- id: lintr
- id: readme-rmd-rendered
Expand Down
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ Description: Add logging, metadata handling, and data handling
License: Apache License (>= 2)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
URL: https://cdcgov.github.io/cfa-epinow2-pipeline/
Imports:
cli,
jsonlite,
jsonvalidate,
rlang
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(add_two_numbers)
export(fetch_config)
export(validate_config)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# CFAEpiNow2Pipeline (development version)

* Config reader with schema validation and enforcement
* CI running on Ubuntu only & working pkgdown deploy to Github Pages
* Initial R package with checks running in CI
100 changes: 100 additions & 0 deletions R/config.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' Fetch the config from an external resource
#'
#' This step is the first part of the modeling pipeline. It looks to Azure Blob
#' and downloads the Rt model run's config to the local config (if
#' `blob_storage_container` is specified), reads the config in from the
#' filesystem, and validates that it matches expectations. If any of these steps
#' fails, the pipeline fails with an informative error message. Note, however,
#' that a failure in this initial step suggests that something fundamental is
#' misspecified and the logs will likely not be preserved in a Blob Container if
#' running in Azure.
#'
#' The validation relies on `inst/data/config_schema.json` for validation. This
#' file is in `json-schema` notation and generated programatically via
#' https://www.jsonschema.net/.
#'
#' @param config_path The path to the config file, either in the local
#' filesystem or with an Azure Blob Storage container. If
#' `blob_storage_container` is specified, the the path is assumed to be within
#' the specified container otherwise it is assumed to be in the local
#' filesystem.
#' @param local_dest The local directory to write the config to when downloading
#' it from `blob_storage_container`. This argument is ignored unless
#' `blob_storage_container` is specified.
#' @param blob_storage_container The storage container holding the config at
#' `config_path`
#' @param config_schema_path The path to the file holding the schema for the
#' config json for the validator to use.
#'
#' @return A list of lists, the config for the run.
#' @export
fetch_config <- function(
config_path,
local_dest,
blob_storage_container,
config_schema_path = system.file("extdata/config_schema.json",
package = "CFAEpiNow2Pipeline"
)) {
if (!rlang::is_null(blob_storage_container)) {
download_from_azure_blob(
config_path,
local_dest,
container_name = blob_storage_container
)
} else {
cli::cli_alert(
"No blob storage container provided. Reading from local path."
)
}

cli::cli_alert_info("Loading config from {.path {config_path}}")
validate_config(config_path, config_schema_path)

config <- rlang::try_fetch(
jsonlite::read_json(config_path),
error = function(con) {
cli::cli_abort(
"Error loading config from {.path {config_path}}",
parent = con,
class = "CFA_Rt"
)
}
)

return(config)
}

#' Compare loaded json against expectation in `inst/data/config-schema.json`
#'
#' @inheritParams fetch_config
#' @return NULL, invisibly
#' @export
validate_config <- function(
config_path,
config_schema_path = system.file("extdata/config_schema.json",
package = "CFAEpiNow2Pipeline"
)) {
is_config_valid <- rlang::try_fetch(
jsonvalidate::json_validate(
json = config_path,
schema = config_schema_path,
engine = "ajv",
verbose = TRUE,
greedy = TRUE,
error = TRUE
),
error = function(con) {
cli::cli_abort(
c(
"Error while validating config",
"!" = "Config path: {.path {config_path}}",
"!" = "Schema path: {.path {config_schema_path}}"
),
parent = con,
class = "CFA_Rt"
)
}
)

invisible(is_config_valid)
}
191 changes: 191 additions & 0 deletions inst/extdata/config_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/Epinow2",
"definitions": {
"Epinow2": {
"type": "object",
"additionalProperties": false,
"properties": {
"job_id": {
"type": "string",
"format": "uuid"
},
"task_id": {
"type": "string",
"format": "uuid"
},
"as_of_date": {
"type": "string",
"format": "date"
},
"disease": {
"type": "string"
},
"geo_value": {
"type": "array",
"items": {
"type": "string"
}
},
"geo_type": {
"type": "string"
},
"parameters": {
"$ref": "#/definitions/Parameters"
},
"data": {
"$ref": "#/definitions/Data"
},
"seed": {
"type": "integer"
},
"horizon": {
"type": "integer"
},
"priors": {
"$ref": "#/definitions/Priors"
},
"sampler_opts": {
"$ref": "#/definitions/SamplerOpts"
}
},
"required": [
"as_of_date",
"data",
"disease",
"geo_type",
"geo_value",
"horizon",
"job_id",
"parameters",
"priors",
"sampler_opts",
"seed",
"task_id"
],
"title": "Epinow2"
},
"Data": {
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string"
},
"blob_storage_container": {
"type": ["null", "string"]
},
"report_date": {
"type": "array",
"items": {
"type": "string",
"format": "date"
}
},
"reference_date": {
"type": "array",
"items": {
"type": "string",
"format": "date"
}
}
},
"required": [
"blob_storage_container",
"path",
"reference_date",
"report_date"
],
"title": "Data"
},
"Parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string"
},
"blob_storage_container": {
"type": ["string", "null"]
}
},
"required": [
"blob_storage_container",
"path"
],
"title": "Parameters"
},
"Priors": {
"type": "object",
"additionalProperties": false,
"properties": {
"rt": {
"$ref": "#/definitions/Rt"
},
"gp": {
"$ref": "#/definitions/Gp"
}
},
"required": [
"gp",
"rt"
],
"title": "Priors"
},
"Gp": {
"type": "object",
"additionalProperties": false,
"properties": {
"alpha_sd": {
"type": "number"
}
},
"required": [
"alpha_sd"
],
"title": "Gp"
},
"Rt": {
"type": "object",
"additionalProperties": false,
"properties": {
"mean": {
"type": "integer"
},
"sd": {
"type": "number"
}
},
"required": [
"mean",
"sd"
],
"title": "Rt"
},
"SamplerOpts": {
"type": "object",
"additionalProperties": false,
"properties": {
"cores": {
"type": "integer"
},
"chains": {
"type": "integer"
},
"adapt_delta": {
"type": "number"
},
"max_treedepth": {
"type": "integer"
}
},
"required": [
"adapt_delta",
"chains",
"cores",
"max_treedepth"
],
"title": "SamplerOpts"
}
}
}
49 changes: 49 additions & 0 deletions man/fetch_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 196d7e2

Please sign in to comment.