Skip to content

Commit

Permalink
Config reader with expected schema validation
Browse files Browse the repository at this point in the history
  • Loading branch information
zsusswein committed Aug 7, 2024
1 parent d3b008f commit 671b22d
Show file tree
Hide file tree
Showing 9 changed files with 474 additions and 0 deletions.
5 changes: 5 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
URL: https://cdcgov.github.io/cfa-epinow2-pipeline/
Imports:
cli,
jsonlite,
jsonvalidate,
rlang
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(add_two_numbers)
export(fetch_config)
export(validate_config)
97 changes: 97 additions & 0 deletions R/config.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#' Fetch the config from an external resource
#'
#' This step is the first part of the modeling pipeline. It looks to Azure Blob
#' and downloads the Rt model run's config to the local config (if
#' `blob_storage_container` is specified), reads the config in from the
#' filesystem, and validates that it matches expectations. If any of these steps
#' fails, the pipeline fails with an informative error message. Note, however,
#' that a failure in this initial step suggests that something fundamental is
#' misspecified and the logs will likely not be preserved in a Blob Container if
#' running in Azure.
#'
#' The validation relies on `inst/data/config_schema.json` for validation. This
#' file is in `json-schema` notation and generated programatically via
#' https://www.jsonschema.net/.
#'
#' @param config_path The path to the config file, either in the local
#' filesystem or with an Azure Blob Storage container. If
#' `blob_storage_container` is specified, the the path is assumed to be within
#' the specified container otherwise it is assumed to be in the local
#' filesystem.
#' @param local_dest The local directory to write the config to when downloading
#' it from `blob_storage_container`. This argument is ignored unless
#' `blob_storage_container` is specified.
#' @param blob_storage_container The storage container holding the config at
#' `config_path`
#' @param `config_schema_path` The path to the file holding the schema for the
#' config json for the validator to use.
#'
#' @return A list of lists, the config for the run.
#' @export
#'
#' @examples
fetch_config <- function(
config_path,
local_dest,
blob_storage_container,
config_schema_path = "data/config_schema.json") {
if (!rlang::is_null(blob_storage_container)) {
download_from_azure_blob(
config_path,
local_dest,
container_name = blob_storage_container
)
} else {
cli::cli_alert(
"No blob storage container provided. Reading from local path."
)
}

cli::cli_alert_info("Loading config from {.path {config_path}}")
validate_config(config_path, config_schema_path)

config <- rlang::try_fetch(
jsonlite::read_json(config_path),
error = function(con) {
cli::cli_abort(
"Error loading config from {.path {config_path}}",
parent = con,
class = "CFA_Rt"
)
}
)

return(config)
}

#' Compare loaded json against expectation in `inst/data/config-schema.json`
#'
#' @inheritParams fetch_config
#'
#' @return NULL, invisibly
#' @export
validate_config <- function(config_path, config_schema_path) {
is_config_valid <- rlang::try_fetch(
jsonvalidate::json_validate(
json = config_path,
schema = config_schema_path,
engine = "ajv",
verbose = TRUE,
greedy = TRUE,
error = TRUE
),
error = function(con) {
cli::cli_abort(
c(
"Error while validating config",
"!" = "Config path: {.path {config_path}}",
"!" = "Schema path: {.path {config_schema_path}}"
),
parent = con,
class = "CFA_Rt"
)
}
)

invisible(is_config_valid)
}
178 changes: 178 additions & 0 deletions inst/data/config_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/Epinow",
"definitions": {
"Epinow": {
"type": "object",
"additionalProperties": false,
"properties": {
"as_of_date": {
"type": "string",
"format": "date"
},
"disease": {
"type": "string"
},
"geo_value": {
"type": "string"
},
"geo_type": {
"type": "string"
},
"parameters": {
"$ref": "#/definitions/Parameters"
},
"data": {
"$ref": "#/definitions/Data"
},
"seed": {
"type": "integer"
},
"horizon": {
"type": "integer"
},
"priors": {
"$ref": "#/definitions/Priors"
},
"sampler_opts": {
"$ref": "#/definitions/SamplerOpts"
}
},
"required": [
"as_of_date",
"data",
"disease",
"geo_type",
"geo_value",
"horizon",
"parameters",
"priors",
"sampler_opts",
"seed"
],
"title": "Epinow"
},
"Data": {
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string"
},
"blob_storage_container": {
"type": ["null", "string"]
},
"report_date": {
"type": "array",
"items": {
"type": "string",
"format": "date"
}
},
"reference_date": {
"type": "array",
"items": {
"type": "string",
"format": "date"
}
}
},
"required": [
"blob_storage_container",
"path",
"reference_date",
"report_date"
],
"title": "Data"
},
"Parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string"
},
"blob_storage_container": {
"type": ["null", "string"]
}
},
"required": [
"blob_storage_container",
"path"
],
"title": "Parameters"
},
"Priors": {
"type": "object",
"additionalProperties": false,
"properties": {
"rt": {
"$ref": "#/definitions/Rt"
},
"gp": {
"$ref": "#/definitions/Gp"
}
},
"required": [
"gp",
"rt"
],
"title": "Priors"
},
"Gp": {
"type": "object",
"additionalProperties": false,
"properties": {
"alpha_sd": {
"type": "number"
}
},
"required": [
"alpha_sd"
],
"title": "Gp"
},
"Rt": {
"type": "object",
"additionalProperties": false,
"properties": {
"mean": {
"type": "integer"
},
"sd": {
"type": "number"
}
},
"required": [
"mean",
"sd"
],
"title": "Rt"
},
"SamplerOpts": {
"type": "object",
"additionalProperties": false,
"properties": {
"cores": {
"type": "integer"
},
"chains": {
"type": "integer"
},
"adapt_delta": {
"type": "number"
},
"max_treedepth": {
"type": "integer"
}
},
"required": [
"adapt_delta",
"chains",
"cores",
"max_treedepth"
],
"title": "SamplerOpts"
}
}
}
48 changes: 48 additions & 0 deletions man/fetch_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/validate_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions tests/testthat/data/bad_sample_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

{
"as_of_date": "2023-01-01",
"geo_value": "test",
"geo_type": "test",
"report_date": [
"01-01"
],
"reference_date": [
"2023-01-01",
"2022-12-30",
"2022-12-29"
],
"seed": "abc",
"horizon": 14,
"priors": {
"rt": {
"mean": 1.0,
"sd": 0.2
},
"gp": {
"alpha_sd": 0.01
}
},
"sampler_opts": {
"cores": 4,
"chains": 4,
"adapt_delta": 0.99,
"max_treedepth": 12,
"not_a_parameter": -12
}
}
Loading

0 comments on commit 671b22d

Please sign in to comment.