CDCgov · zsusswein · Dec 9, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 5, 2024
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # CFAEpiNow2Pipeline (development version)
 
+* Populated the default values of the metadata to be saved.
 * Creating a Config class to make syncing configuration differences easier.
 * Add a JSON reader for the Config class.
 * Use the Config class throughout the pipeline.

diff --git a/R/config.R b/R/config.R
@@ -93,16 +93,15 @@ Parameters <- S7::new_class( # nolint: object_name_linter
 #' occur.
 #' @param report_date A list of strings representing report dates.
 #' @param reference_date A list of strings representing reference dates.
-#' @param production_date A list of strings representing production dates.
+
 #' @export
 Data <- S7::new_class( # nolint: object_name_linter
   "Data",
   properties = list(
     path = S7::class_character,
     blob_storage_container = character_or_null,
     report_date = S7::class_character,
-    reference_date = S7::class_character,
-    production_date = S7::class_character
+    reference_date = S7::class_character
   )
 )
 
@@ -116,6 +115,8 @@ Data <- S7::new_class( # nolint: object_name_linter
 #' date. Formatted as "YYYY-MM-DD".
 #' @param max_reference_date A string representing the maximum reference
 #' date. Formatted as "YYYY-MM-DD".
+#' @param production_date A string representing the production date.
+#' Formatted as "YYYY-MM-DD".
 #' @param disease A string specifying the disease being modeled.
 #' @param geo_value A string specifying the geographic value, usually a state.
 #' @param geo_type A string specifying the geographic type, usually "state".
@@ -146,6 +147,7 @@ Config <- S7::new_class( # nolint: object_name_linter
     min_reference_date = S7::class_character,
     max_reference_date = S7::class_character,
     report_date = S7::class_character,
+    production_date = S7::class_character,
     disease = S7::class_character,
     geo_value = S7::class_character,
     geo_type = S7::class_character,

diff --git a/R/pipeline.R b/R/pipeline.R
@@ -211,15 +211,34 @@ execute_model_logic <- function(config, output_dir) {
     quantiles = unlist(config@quantile_width)
   )
 
+  # All the top level metadata fields
+  metadata <- list(
+    job_id = config@job_id,
+    task_id = config@task_id,
+    data_path = ifelse(
+      # is_empty checks for NULL and empty data structures
+      rlang::is_empty(config@data@path),
+      config@data@path, ""
+    ),
+    model = config@model,
+    disease = config@disease,
+    geo_value = config@geo_value,
+    report_date = config@report_date,
+    production_date = config@production_date,
+    max_reference_date = config@max_reference_date,
+    min_reference_date = config@min_reference_date,
+    exclusions = config@exclusions@path,
+    run_at = format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
+  )
+
   write_model_outputs(
     fit = fit,
     samples = samples,
     summaries = summaries,
     output_dir = output_dir,
     job_id = config@job_id,
     task_id = config@task_id,
-    # TODO: metadata
-    metadata = list()
+    metadata = metadata
   )
 
   return(TRUE)

diff --git a/R/write_output.R b/R/write_output.R
@@ -9,7 +9,9 @@
 #' @param summaries A data.table as returned by [process_quantiles()]
 #' @param job_id String. The identifier for the job.
 #' @param task_id String. The identifier for the task.
-#' @param metadata List. Additional metadata to be included in the output.
+#' @param metadata List. Additional metadata to be included in the output. The
+#' paths to the samples, summaries, and model output will be added to the
+#' metadata list.
 #'
 #' @return Invisible NULL. The function is called for its side effects.
 #' @export
@@ -65,6 +67,15 @@ write_model_outputs <- function(
         task_id,
         "metadata.json"
       )
+      # Add paths to metadata.
+      metadata <- utils::modifyList(
+        metadata,
+        list(
+          samples_path = samples_path,
+          summaries_path = summaries_path,
+          model_path = model_path
+        )
+      )
       jsonlite::write_json(metadata, metadata_path, pretty = TRUE)
       cli::cli_alert_success("Wrote metadata to {.path {metadata_path}}")
     },

diff --git a/man/Config.Rd b/man/Config.Rd
diff --git a/man/Data.Rd b/man/Data.Rd
diff --git a/man/write_model_outputs.Rd b/man/write_model_outputs.Rd
diff --git a/tests/testthat/data/sample_config_no_exclusion.json b/tests/testthat/data/sample_config_no_exclusion.json
@@ -7,6 +7,7 @@
     "min_reference_date": "2023-01-02",
     "max_reference_date": "2023-01-07",
     "report_date": "2023-10-28",
+    "production_date": "2024-10-28",
     "quantile_width": [0.5, 0.95],
     "model": "EpiNow2_test",
     "parameters": {

diff --git a/tests/testthat/data/sample_config_with_exclusion.json b/tests/testthat/data/sample_config_with_exclusion.json
@@ -7,6 +7,7 @@
     "min_reference_date": "2023-01-02",
     "max_reference_date": "2023-01-07",
     "report_date": "2023-10-28",
+    "production_date": "2024-10-28",
     "quantile_width": [0.5, 0.95],
     "model": "EpiNow2_test",
     "parameters": {

diff --git a/tests/testthat/helper-expect_pipeline_files_written.R b/tests/testthat/helper-expect_pipeline_files_written.R
@@ -1,4 +1,8 @@
-expect_pipeline_files_written <- function(output_dir, job_id, task_id) {
+expect_pipeline_files_written <- function(
+    output_dir,
+    job_id,
+    task_id,
+    check_logs = TRUE) {
   ########
   # Assert output files all exist
   job_path <- file.path(output_dir, job_id)
@@ -25,11 +29,14 @@ expect_pipeline_files_written <- function(output_dir, job_id, task_id) {
     )
   )
   # Model
-  file.exists(
-    file.path(task_path, "model.rds")
-  )
+  expect_true(file.exists(file.path(task_path, "model.rds")))
   # Logs
-  file.exists(
-    file.path(task_path, "logs.txt")
-  )
+  if (check_logs) {
+    expect_true(file.exists(file.path(task_path, "logs.txt")))
+  }
+  # Non-empty metadata
+  metadata_path <- file.path(task_path, "metadata.json")
+  expect_true(file.exists(metadata_path))
+  metadata <- jsonlite::read_json(metadata_path)
+  expect_gt(length(metadata), 0)
 }
diff --git a/tests/testthat/test-pipeline.R b/tests/testthat/test-pipeline.R
@@ -88,7 +88,11 @@ test_that("Process pipeline produces expected outputs and returns success", {
   expect_pipeline_files_written(
     output_dir,
     config@job_id,
-    config@task_id
+    config@task_id,
+    # Don't check logs here, bc logs are set up by orchestrate_pipeline(), but
+    # this test is just for execute_model_logic() which is called after logs are
+    # set up in orchestrate_pipeline().
+    check_logs = FALSE
   )
 })
 
@@ -112,7 +116,11 @@ test_that("Runs on config from generator as of 2024-11-26", {
   expect_pipeline_files_written(
     output_dir,
     config@job_id,
-    config@task_id
+    config@task_id,
+    # Do not check for log output here, bc logs get created in
+    # `orchestrate_pipeline()`, and this test only calls `execute_model_logic()`
+    # which gets called after the log files have been created.
+    check_logs = FALSE
   )
 })