diff --git a/.gitignore b/.gitignore index f32bdb0..be49f51 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,7 @@ cython_debug/ ################################################################################ poetry.lock + +*.csv + +*.tsv diff --git a/forecasttools_py/data.py b/forecasttools_py/data.py new file mode 100644 index 0000000..26e702c --- /dev/null +++ b/forecasttools_py/data.py @@ -0,0 +1,38 @@ +""" +Retrieves US 2020 Census data, +hospitalization count data, and +example forecast data. +""" + +import os + +import polars as pl + + +def get_census_data( + save_path: str = os.getcwd(), + save_as_csv: bool = False, + url: str = "https://www2.census.gov/geo/docs/reference/state.txt", +): + """ + Retrieves US 2020 Census data in a + three column polars dataframe. + """ + nation = pl.DataFrame( + { + "location_code": ["US"], + "short_name": ["US"], + "long_name": ["United States"], + } + ) + jurisdictions = pl.read_csv(url, separator="|").select( + [ + pl.col("STATE").alias("location_code").cast(pl.Utf8), + pl.col("STUSAB").alias("short_name"), + pl.col("STATE_NAME").alias("long_name"), + ] + ) + flusight_location_table = nation.vstack(jurisdictions) + if save_as_csv: + flusight_location_table.write_csv("flusight_location_table.csv") + return flusight_location_table diff --git a/vignettes/example-infectious-disease-forecast.qmd b/vignettes/example-infectious-disease-forecast.qmd new file mode 100644 index 0000000..0efd44f --- /dev/null +++ b/vignettes/example-infectious-disease-forecast.qmd @@ -0,0 +1,10 @@ +--- +title: "Example Infectious Disease Forecast" +format: gfm +engine: jupyter +--- + + +# Background + +The fitting data for this example came from HHS's HealthData website, which hosted the National Health Safety Network's (NHSN) [dataset](https://healthdata.gov/Hospital/COVID-19-Reported-Patient-Impact-and-Hospital-Capa/g62h-syeh/about_data), which the author located off of the FluSight Hub's [data GitHub repository](https://github.com/cdcepi/Flusight-forecast-data/tree/master/data-truth). diff --git a/vignettes/format-flusight-output.qmd b/vignettes/format-flusight-output.qmd new file mode 100644 index 0000000..42cb205 --- /dev/null +++ b/vignettes/format-flusight-output.qmd @@ -0,0 +1,41 @@ +--- +title: "Formatting Output For FluSight Submission" + +format: gfm +engine: jupyter +--- + +_This document details the process of using `forecasttools-py` to turn an infectious disease forecast produced in Python and stored as an `InferenceData` object into a correctly formatted FluSight submission._ + +--- + +# Background + +> The FluSight Challenge[^historical_flusight] uses the [`hubverse`](https://hubverse.io/) framework to accept forecast submissions from contributing teams via [GitHub pull requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests). The submitted `model-output` files need to follow a [specified formatting schema](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md). `forecasttools-py` provides functions to automate production of FluSight submissions. + +[^historical_flusight]: For FluSight data, please refer to [this repository](https://github.com/cdcepi/Flusight-forecast-data). For the FluSight Hub repository, please refer [here](https://github.com/cdcepi/FluSight-forecast-hub), and for historical FluSight forecasts, please refer [here](https://github.com/cdcepi/FluSight-forecasts). + +Below is an example[^example_forecast] of model output submitted to the FluSight Hub: + +[^example_forecast]: [This forecast](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/cfarenewal-cfaepimlight/2023-12-02-cfarenewal-cfaepimlight.csv) was made for `2023-12-02` by the model `cfarenewal-cfaepimlight`. + +``` +reference_date,target,horizon,target_end_date,location,output_type,output_type_id,value +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.01,48 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.025,52 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.05,57 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.1,61 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.15,64 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.2,67 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.25,70 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.3,72 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.35,74 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.4,75 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.45,77 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.5,79 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.55,81 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.6,83 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.65,85 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.7,87 +2023-12-02,wk inc flu hosp,-1,2023-11-25,01,quantile,0.75,89 +```