Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata output #105

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# CFAEpiNow2Pipeline (development version)

* Populated the default values of the metadata to be saved.
* Creating a Config class to make syncing configuration differences easier.
* Add a JSON reader for the Config class.
* Use the Config class throughout the pipeline.
Expand Down
8 changes: 5 additions & 3 deletions R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,15 @@ Parameters <- S7::new_class( # nolint: object_name_linter
#' occur.
#' @param report_date A list of strings representing report dates.
#' @param reference_date A list of strings representing reference dates.
#' @param production_date A list of strings representing production dates.

#' @export
Data <- S7::new_class( # nolint: object_name_linter
"Data",
properties = list(
path = S7::class_character,
blob_storage_container = character_or_null,
report_date = S7::class_character,
reference_date = S7::class_character,
production_date = S7::class_character
reference_date = S7::class_character
)
)

Expand All @@ -116,6 +115,8 @@ Data <- S7::new_class( # nolint: object_name_linter
#' date. Formatted as "YYYY-MM-DD".
#' @param max_reference_date A string representing the maximum reference
#' date. Formatted as "YYYY-MM-DD".
#' @param production_date A string representing the production date.
#' Formatted as "YYYY-MM-DD".
#' @param disease A string specifying the disease being modeled.
#' @param geo_value A string specifying the geographic value, usually a state.
#' @param geo_type A string specifying the geographic type, usually "state".
Expand Down Expand Up @@ -146,6 +147,7 @@ Config <- S7::new_class( # nolint: object_name_linter
min_reference_date = S7::class_character,
max_reference_date = S7::class_character,
report_date = S7::class_character,
production_date = S7::class_character,
disease = S7::class_character,
geo_value = S7::class_character,
geo_type = S7::class_character,
Expand Down
23 changes: 21 additions & 2 deletions R/pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,34 @@ execute_model_logic <- function(config, output_dir) {
quantiles = unlist(config@quantile_width)
)

# All the top level metadata fields
metadata <- list(
job_id = config@job_id,
task_id = config@task_id,
data_path = ifelse(
# is_empty checks for NULL and empty data structures
rlang::is_empty(config@data@path),
config@data@path, ""
),
model = config@model,
disease = config@disease,
geo_value = config@geo_value,
report_date = config@report_date,
production_date = config@production_date,
max_reference_date = config@max_reference_date,
min_reference_date = config@min_reference_date,
exclusions = config@exclusions@path,
run_at = format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
)

write_model_outputs(
fit = fit,
samples = samples,
summaries = summaries,
output_dir = output_dir,
job_id = config@job_id,
task_id = config@task_id,
# TODO: metadata
metadata = list()
metadata = metadata
)

return(TRUE)
Expand Down
13 changes: 12 additions & 1 deletion R/write_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#' @param summaries A data.table as returned by [process_quantiles()]
#' @param job_id String. The identifier for the job.
#' @param task_id String. The identifier for the task.
#' @param metadata List. Additional metadata to be included in the output.
#' @param metadata List. Additional metadata to be included in the output. The
#' paths to the samples, summaries, and model output will be added to the
#' metadata list.
#'
#' @return Invisible NULL. The function is called for its side effects.
#' @export
Expand Down Expand Up @@ -65,6 +67,15 @@ write_model_outputs <- function(
task_id,
"metadata.json"
)
# Add paths to metadata.
metadata <- utils::modifyList(
metadata,
list(
samples_path = samples_path,
summaries_path = summaries_path,
model_path = model_path
)
)
jsonlite::write_json(metadata, metadata_path, pretty = TRUE)
cli::cli_alert_success("Wrote metadata to {.path {metadata_path}}")
},
Expand Down
4 changes: 4 additions & 0 deletions man/Config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions man/Data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/write_model_outputs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/data/sample_config_no_exclusion.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"min_reference_date": "2023-01-02",
"max_reference_date": "2023-01-07",
"report_date": "2023-10-28",
"production_date": "2024-10-28",
"quantile_width": [0.5, 0.95],
"model": "EpiNow2_test",
"parameters": {
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/data/sample_config_with_exclusion.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"min_reference_date": "2023-01-02",
"max_reference_date": "2023-01-07",
"report_date": "2023-10-28",
"production_date": "2024-10-28",
"quantile_width": [0.5, 0.95],
"model": "EpiNow2_test",
"parameters": {
Expand Down
21 changes: 14 additions & 7 deletions tests/testthat/helper-expect_pipeline_files_written.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
expect_pipeline_files_written <- function(output_dir, job_id, task_id) {
expect_pipeline_files_written <- function(
output_dir,
job_id,
task_id,
check_logs = TRUE) {
########
# Assert output files all exist
job_path <- file.path(output_dir, job_id)
Expand All @@ -25,11 +29,14 @@ expect_pipeline_files_written <- function(output_dir, job_id, task_id) {
)
)
# Model
file.exists(
file.path(task_path, "model.rds")
)
expect_true(file.exists(file.path(task_path, "model.rds")))
# Logs
file.exists(
file.path(task_path, "logs.txt")
)
if (check_logs) {
expect_true(file.exists(file.path(task_path, "logs.txt")))
}
# Non-empty metadata
metadata_path <- file.path(task_path, "metadata.json")
expect_true(file.exists(metadata_path))
metadata <- jsonlite::read_json(metadata_path)
expect_gt(length(metadata), 0)
}
12 changes: 10 additions & 2 deletions tests/testthat/test-pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ test_that("Process pipeline produces expected outputs and returns success", {
expect_pipeline_files_written(
output_dir,
config@job_id,
config@task_id
config@task_id,
# Don't check logs here, bc logs are set up by orchestrate_pipeline(), but
# this test is just for execute_model_logic() which is called after logs are
# set up in orchestrate_pipeline().
check_logs = FALSE
)
})

Expand All @@ -112,7 +116,11 @@ test_that("Runs on config from generator as of 2024-11-26", {
expect_pipeline_files_written(
output_dir,
config@job_id,
config@task_id
config@task_id,
# Do not check for log output here, bc logs get created in
# `orchestrate_pipeline()`, and this test only calls `execute_model_logic()`
# which gets called after the log files have been created.
check_logs = FALSE
)
})

Expand Down
Loading