diff --git a/pull_ozh.py b/pull_ozh.py index a7dc544ab34..29503ad9b68 100644 --- a/pull_ozh.py +++ b/pull_ozh.py @@ -18,6 +18,14 @@ def get_date_range(dfs): def main(): + dimensions = { + 'cases': {'csv': 'ncumul_conf', 'xlsx': 'Cases'}, + 'fatalities': {'csv': 'ncumul_deceased', 'xlsx': 'Fatalities'}, + 'hospitalized': {'csv': 'ncumul_hosp', 'xlsx': 'Hospitalized'}, + 'icu': {'csv': 'ncumul_ICU', 'xlsx': 'ICU'}, + 'vent': {'csv': 'ncumul_vent', 'xlsx': 'Ventilated'}, + 'released': {'csv': 'ncumul_released', 'xlsx': 'Released'} + } parser = ConfigParser() parser.read("sources.ini") @@ -41,61 +49,29 @@ def main(): # Append empty dates to all dates = get_date_range(dfs) - df_cases = pd.DataFrame(float("nan"), index=dates, columns=cantons) - df_fatalities = pd.DataFrame(float("nan"), index=dates, columns=cantons) - df_hospitalized = pd.DataFrame(float("nan"), index=dates, columns=cantons) - df_icu = pd.DataFrame(float("nan"), index=dates, columns=cantons) - df_vent = pd.DataFrame(float("nan"), index=dates, columns=cantons) - df_released = pd.DataFrame(float("nan"), index=dates, columns=cantons) + df_by_dimension = {dimension: pd.DataFrame(float("nan"), index=dates, columns=cantons) for dimension in dimensions} for canton, df in dfs.items(): for d in dates: if d in df.index: - df_cases[canton][d] = df["ncumul_conf"][d] - df_fatalities[canton][d] = df["ncumul_deceased"][d] - df_hospitalized[canton][d] = df["ncumul_hosp"][d] - df_icu[canton][d] = df["ncumul_ICU"][d] - df_vent[canton][d] = df["ncumul_vent"][d] - df_released[canton][d] = df["ncumul_released"][d] + for dimension in dimensions: + df_by_dimension[dimension][canton][d] = df[dimensions[dimension]['csv']][d] # Fill to calculate the correct totals for CH - df_cases_total = df_cases.fillna(method="ffill") - df_fatalities_total = df_fatalities.fillna(method="ffill") - df_hospitalized_total = df_hospitalized.fillna(method="ffill") - df_icu_total = df_icu.fillna(method="ffill") - df_vent_total = df_vent.fillna(method="ffill") - df_released_total = df_released.fillna(method="ffill") - - df_cases["CH"] = df_cases_total.sum(axis=1) - df_fatalities["CH"] = df_fatalities_total.sum(axis=1) - df_hospitalized["CH"] = df_hospitalized_total.sum(axis=1) - df_icu["CH"] = df_icu_total.sum(axis=1) - df_vent["CH"] = df_vent_total.sum(axis=1) - df_released["CH"] = df_released_total.sum(axis=1) + df_total = {dimension: df.fillna(method="ffill") for dimension, df in df_by_dimension.items()} + + for dimension in dimensions: + df_by_dimension[dimension]['CH'] = df_total[dimension].sum(axis=1) # Create a summery with the most important values in json to allow web devs to grab it summary = { - "totals": { - "cases": df_cases["CH"][-1], - "fatalities": df_fatalities["CH"][-1], - "hospitalized": df_hospitalized["CH"][-1], - "icu": df_icu["CH"][-1], - "vent": df_vent["CH"][-1], - "released": df_released["CH"][-1], - }, - "changes": { - "cases": df_cases["CH"][-1] - df_cases["CH"][-2], - "fatalities": df_fatalities["CH"][-1] - df_fatalities["CH"][-2], - "hospitalized": df_hospitalized["CH"][-1] - df_hospitalized["CH"][-2], - "icu": df_icu["CH"][-1] - df_icu["CH"][-2], - "vent": df_vent["CH"][-1] - df_vent["CH"][-2], - "released": df_released["CH"][-1] - df_released["CH"][-2], - }, + "totals": {dimension: df["CH"][-1] for dimension, df in df_by_dimension.items()}, + "changes": {dimension: df["CH"][-1] - df["CH"][-2] for dimension, df in df_by_dimension.items()}, "updated_cantons": ",".join( [ canton - for canton in df_cases - if canton != "CH" and not pd.np.isnan(float(df_cases[canton][-1])) + for canton in df_by_dimension['cases'] + if canton != "CH" and not pd.np.isnan(float(df_by_dimension['cases'][canton][-1])) ] ), } @@ -104,44 +80,16 @@ def main(): json.dump(summary, f) # Store as CSV - df_cases.to_csv("covid19_cases_switzerland_openzh.csv", index_label="Date") - df_fatalities.to_csv( - "covid19_fatalities_switzerland_openzh.csv", index_label="Date" - ) - df_hospitalized.to_csv( - "covid19_hospitalized_switzerland_openzh.csv", index_label="Date" - ) - df_icu.to_csv("covid19_icu_switzerland_openzh.csv", index_label="Date") - df_vent.to_csv("covid19_vent_switzerland_openzh.csv", index_label="Date") - df_released.to_csv("covid19_released_switzerland_openzh.csv", index_label="Date") + for dimension in dimensions: + df_by_dimension[dimension].to_csv(f"covid19_{dimension}_switzerland_openzh.csv", index_label="Date") # Store as json - df_cases.to_json("covid19_cases_switzerland_openzh.json") - df_fatalities.to_json("covid19_fatalities_switzerland_openzh.json") - df_hospitalized.to_json("covid19_hospitalized_switzerland_openzh.json") - df_icu.to_json("covid19_icu_switzerland_openzh.json") - df_vent.to_json("covid19_vent_switzerland_openzh.json") - df_released.to_json("covid19_released_switzerland_openzh.json") + for dimension, df in df_by_dimension.items(): + df.to_json(f"covid19_{dimension}_switzerland_openzh.json") with pd.ExcelWriter("covid_19_data_switzerland.xlsx") as writer: - df_cases.to_excel( - writer, index_label="Date", sheet_name="Cases", - ) - df_fatalities.to_excel( - writer, index_label="Date", sheet_name="Fatalities", - ) - df_hospitalized.to_excel( - writer, index_label="Date", sheet_name="Hospitalized", - ) - df_icu.to_excel( - writer, index_label="Date", sheet_name="ICU", - ) - df_vent.to_excel( - writer, index_label="Date", sheet_name="Ventilated", - ) - df_released.to_excel( - writer, index_label="Date", sheet_name="Released", - ) + for dimension, df in df_by_dimension.items(): + df.to_excel(writer, index_label="Date", sheet_name=dimensions[dimension]['xlsx']) if __name__ == "__main__":