Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring pull_ozh.py #67

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 25 additions & 77 deletions pull_ozh.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ def get_date_range(dfs):


def main():
dimensions = {
'cases': {'csv': 'ncumul_conf', 'xlsx': 'Cases'},
'fatalities': {'csv': 'ncumul_deceased', 'xlsx': 'Fatalities'},
'hospitalized': {'csv': 'ncumul_hosp', 'xlsx': 'Hospitalized'},
'icu': {'csv': 'ncumul_ICU', 'xlsx': 'ICU'},
'vent': {'csv': 'ncumul_vent', 'xlsx': 'Ventilated'},
'released': {'csv': 'ncumul_released', 'xlsx': 'Released'}
}
parser = ConfigParser()
parser.read("sources.ini")

Expand All @@ -41,61 +49,29 @@ def main():

# Append empty dates to all
dates = get_date_range(dfs)
df_cases = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_fatalities = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_hospitalized = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_icu = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_vent = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_released = pd.DataFrame(float("nan"), index=dates, columns=cantons)
df_by_dimension = {dimension: pd.DataFrame(float("nan"), index=dates, columns=cantons) for dimension in dimensions}

for canton, df in dfs.items():
for d in dates:
if d in df.index:
df_cases[canton][d] = df["ncumul_conf"][d]
df_fatalities[canton][d] = df["ncumul_deceased"][d]
df_hospitalized[canton][d] = df["ncumul_hosp"][d]
df_icu[canton][d] = df["ncumul_ICU"][d]
df_vent[canton][d] = df["ncumul_vent"][d]
df_released[canton][d] = df["ncumul_released"][d]
for dimension in dimensions:
df_by_dimension[dimension][canton][d] = df[dimensions[dimension]['csv']][d]

# Fill to calculate the correct totals for CH
df_cases_total = df_cases.fillna(method="ffill")
df_fatalities_total = df_fatalities.fillna(method="ffill")
df_hospitalized_total = df_hospitalized.fillna(method="ffill")
df_icu_total = df_icu.fillna(method="ffill")
df_vent_total = df_vent.fillna(method="ffill")
df_released_total = df_released.fillna(method="ffill")

df_cases["CH"] = df_cases_total.sum(axis=1)
df_fatalities["CH"] = df_fatalities_total.sum(axis=1)
df_hospitalized["CH"] = df_hospitalized_total.sum(axis=1)
df_icu["CH"] = df_icu_total.sum(axis=1)
df_vent["CH"] = df_vent_total.sum(axis=1)
df_released["CH"] = df_released_total.sum(axis=1)
df_total = {dimension: df.fillna(method="ffill") for dimension, df in df_by_dimension.items()}

for dimension in dimensions:
df_by_dimension[dimension]['CH'] = df_total[dimension].sum(axis=1)

# Create a summery with the most important values in json to allow web devs to grab it
summary = {
"totals": {
"cases": df_cases["CH"][-1],
"fatalities": df_fatalities["CH"][-1],
"hospitalized": df_hospitalized["CH"][-1],
"icu": df_icu["CH"][-1],
"vent": df_vent["CH"][-1],
"released": df_released["CH"][-1],
},
"changes": {
"cases": df_cases["CH"][-1] - df_cases["CH"][-2],
"fatalities": df_fatalities["CH"][-1] - df_fatalities["CH"][-2],
"hospitalized": df_hospitalized["CH"][-1] - df_hospitalized["CH"][-2],
"icu": df_icu["CH"][-1] - df_icu["CH"][-2],
"vent": df_vent["CH"][-1] - df_vent["CH"][-2],
"released": df_released["CH"][-1] - df_released["CH"][-2],
},
"totals": {dimension: df["CH"][-1] for dimension, df in df_by_dimension.items()},
"changes": {dimension: df["CH"][-1] - df["CH"][-2] for dimension, df in df_by_dimension.items()},
"updated_cantons": ",".join(
[
canton
for canton in df_cases
if canton != "CH" and not pd.np.isnan(float(df_cases[canton][-1]))
for canton in df_by_dimension['cases']
if canton != "CH" and not pd.np.isnan(float(df_by_dimension['cases'][canton][-1]))
]
),
}
Expand All @@ -104,44 +80,16 @@ def main():
json.dump(summary, f)

# Store as CSV
df_cases.to_csv("covid19_cases_switzerland_openzh.csv", index_label="Date")
df_fatalities.to_csv(
"covid19_fatalities_switzerland_openzh.csv", index_label="Date"
)
df_hospitalized.to_csv(
"covid19_hospitalized_switzerland_openzh.csv", index_label="Date"
)
df_icu.to_csv("covid19_icu_switzerland_openzh.csv", index_label="Date")
df_vent.to_csv("covid19_vent_switzerland_openzh.csv", index_label="Date")
df_released.to_csv("covid19_released_switzerland_openzh.csv", index_label="Date")
for dimension in dimensions:
df_by_dimension[dimension].to_csv(f"covid19_{dimension}_switzerland_openzh.csv", index_label="Date")

# Store as json
df_cases.to_json("covid19_cases_switzerland_openzh.json")
df_fatalities.to_json("covid19_fatalities_switzerland_openzh.json")
df_hospitalized.to_json("covid19_hospitalized_switzerland_openzh.json")
df_icu.to_json("covid19_icu_switzerland_openzh.json")
df_vent.to_json("covid19_vent_switzerland_openzh.json")
df_released.to_json("covid19_released_switzerland_openzh.json")
for dimension, df in df_by_dimension.items():
df.to_json(f"covid19_{dimension}_switzerland_openzh.json")

with pd.ExcelWriter("covid_19_data_switzerland.xlsx") as writer:
df_cases.to_excel(
writer, index_label="Date", sheet_name="Cases",
)
df_fatalities.to_excel(
writer, index_label="Date", sheet_name="Fatalities",
)
df_hospitalized.to_excel(
writer, index_label="Date", sheet_name="Hospitalized",
)
df_icu.to_excel(
writer, index_label="Date", sheet_name="ICU",
)
df_vent.to_excel(
writer, index_label="Date", sheet_name="Ventilated",
)
df_released.to_excel(
writer, index_label="Date", sheet_name="Released",
)
for dimension, df in df_by_dimension.items():
df.to_excel(writer, index_label="Date", sheet_name=dimensions[dimension]['xlsx'])


if __name__ == "__main__":
Expand Down