From 6b47b112746f645daf134dce1255b6dadd1cd33e Mon Sep 17 00:00:00 2001 From: AFg6K7h4fhy2 <127630341+AFg6K7h4fhy2@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:21:50 -0400 Subject: [PATCH 1/5] create file for example infectious disease forecast to be stored in forecasttools-py --- vignettes/example-infectious-disease-forecast.qmd | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 vignettes/example-infectious-disease-forecast.qmd diff --git a/vignettes/example-infectious-disease-forecast.qmd b/vignettes/example-infectious-disease-forecast.qmd new file mode 100644 index 0000000..e7d8cb7 --- /dev/null +++ b/vignettes/example-infectious-disease-forecast.qmd @@ -0,0 +1,5 @@ +--- +title: "Example Infectious Disease Forecast" +format: gfm +engine: jupyter +--- From cc7dc9590097a00cb7839828fb4e8bb2c424a63b Mon Sep 17 00:00:00 2001 From: AFg6K7h4fhy2 <127630341+AFg6K7h4fhy2@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:28:16 -0400 Subject: [PATCH 2/5] begin background information for flusight output formatting --- vignettes/format-flusight-output.qmd | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 vignettes/format-flusight-output.qmd diff --git a/vignettes/format-flusight-output.qmd b/vignettes/format-flusight-output.qmd new file mode 100644 index 0000000..42cb205 --- /dev/null +++ b/vignettes/format-flusight-output.qmd @@ -0,0 +1,41 @@ +--- +title: "Formatting Output For FluSight Submission" + +format: gfm +engine: jupyter +--- + +_This document details the process of using `forecasttools-py` to turn an infectious disease forecast produced in Python and stored as an `InferenceData` object into a correctly formatted FluSight submission._ + +--- + +# Background + +> The FluSight Challenge[^historical_flusight] uses the [`hubverse`](https://hubverse.io/) framework to accept forecast submissions from contributing teams via [GitHub pull requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests). The submitted `model-output` files need to follow a [specified formatting schema](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md). `forecasttools-py` provides functions to automate production of FluSight submissions. + +[^historical_flusight]: For FluSight data, please refer to [this repository](https://github.com/cdcepi/Flusight-forecast-data). For the FluSight Hub repository, please refer [here](https://github.com/cdcepi/FluSight-forecast-hub), and for historical FluSight forecasts, please refer [here](https://github.com/cdcepi/FluSight-forecasts). + +Below is an example[^example_forecast] of model output submitted to the FluSight Hub: + +[^example_forecast]: [This forecast](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/cfarenewal-cfaepimlight/2023-12-02-cfarenewal-cfaepimlight.csv) was made for `2023-12-02` by the model `cfarenewal-cfaepimlight`. + +``` +reference_date,target,horizon,target_end_date,location,output_type,output_type_id,value +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.01,48 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.025,52 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.05,57 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.1,61 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.15,64 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.2,67 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.25,70 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.3,72 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.35,74 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.4,75 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.45,77 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.5,79 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.55,81 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.6,83 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.65,85 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.7,87 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.75,89 +``` From 3fed4c85df0feb36c8876e0f43111972ed9b849c Mon Sep 17 00:00:00 2001 From: AFg6K7h4fhy2 <127630341+AFg6K7h4fhy2@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:07:11 -0400 Subject: [PATCH 3/5] start data file --- forecasttools_py/data.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 forecasttools_py/data.py diff --git a/forecasttools_py/data.py b/forecasttools_py/data.py new file mode 100644 index 0000000..f039ad9 --- /dev/null +++ b/forecasttools_py/data.py @@ -0,0 +1,3 @@ +""" +Retrieves US 2020 Census data and reads in NHSN influenza and COVID hospitalization counts data for use in example forecasting. +""" From ccde7ff1bb541b56b68e8120eac01970dd08913e Mon Sep 17 00:00:00 2001 From: AFg6K7h4fhy2 <127630341+AFg6K7h4fhy2@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:55:19 -0400 Subject: [PATCH 4/5] read census data function --- forecasttools_py/data.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/forecasttools_py/data.py b/forecasttools_py/data.py index f039ad9..26e702c 100644 --- a/forecasttools_py/data.py +++ b/forecasttools_py/data.py @@ -1,3 +1,38 @@ """ -Retrieves US 2020 Census data and reads in NHSN influenza and COVID hospitalization counts data for use in example forecasting. +Retrieves US 2020 Census data, +hospitalization count data, and +example forecast data. """ + +import os + +import polars as pl + + +def get_census_data( + save_path: str = os.getcwd(), + save_as_csv: bool = False, + url: str = "https://www2.census.gov/geo/docs/reference/state.txt", +): + """ + Retrieves US 2020 Census data in a + three column polars dataframe. + """ + nation = pl.DataFrame( + { + "location_code": ["US"], + "short_name": ["US"], + "long_name": ["United States"], + } + ) + jurisdictions = pl.read_csv(url, separator="|").select( + [ + pl.col("STATE").alias("location_code").cast(pl.Utf8), + pl.col("STUSAB").alias("short_name"), + pl.col("STATE_NAME").alias("long_name"), + ] + ) + flusight_location_table = nation.vstack(jurisdictions) + if save_as_csv: + flusight_location_table.write_csv("flusight_location_table.csv") + return flusight_location_table From 74addb95a24c2d3a605afed201b12c55da59ebe5 Mon Sep 17 00:00:00 2001 From: AFg6K7h4fhy2 <127630341+AFg6K7h4fhy2@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:46:23 -0400 Subject: [PATCH 5/5] minor update to data files; moving to data branch --- .gitignore | 4 ++++ vignettes/example-infectious-disease-forecast.qmd | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index f32bdb0..be49f51 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,7 @@ cython_debug/ ################################################################################ poetry.lock + +*.csv + +*.tsv diff --git a/vignettes/example-infectious-disease-forecast.qmd b/vignettes/example-infectious-disease-forecast.qmd index e7d8cb7..0efd44f 100644 --- a/vignettes/example-infectious-disease-forecast.qmd +++ b/vignettes/example-infectious-disease-forecast.qmd @@ -3,3 +3,8 @@ title: "Example Infectious Disease Forecast" format: gfm engine: jupyter --- + + +# Background + +The fitting data for this example came from HHS's HealthData website, which hosted the National Health Safety Network's (NHSN) [dataset](https://healthdata.gov/Hospital/COVID-19-Reported-Patient-Impact-and-Hospital-Capa/g62h-syeh/about_data), which the author located off of the FluSight Hub's [data GitHub repository](https://github.com/cdcepi/Flusight-forecast-data/tree/master/data-truth).