Skip to content

Commit

Permalink
Merging of double events now higher up the chain
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Nov 25, 2024
1 parent d0b3466 commit eb13dd6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 48 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ impact_data_Mali_tidied.csv
missing_cercle_info.csv
missing_cercle_info_after_mod.csv
impact_events_per_admin_54.csv
impact_events_per_admin_529.csv
impact_events_per_admin_673.csv

# ignore experimentation notebooks
Expand Down
94 changes: 46 additions & 48 deletions GoogleFloodHub/src/GRRR.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2104,7 +2104,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -2219,6 +2219,38 @@
" return df\n",
"\n",
"\n",
"def merge_duplicate_events(d_events: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]:\n",
" \"\"\"\n",
" ilter out double impact events by checking whether events have the \n",
" same start date, and then merge them to an event with the start date\n",
" of the first, and end date of the last event with the same start date\n",
"\n",
" :param dict_events: dict with au codas keys and events dfs as values\n",
" :return: same dict, but merged\n",
" \"\"\"\n",
" d_events_merged = {}\n",
"\n",
" for admin_unit, df_events in d_events.items():\n",
" df_events = df_events.reset_index(drop = True)\n",
" grouped = df_events.groupby('flood_start', as_index = False)\n",
" # merge events with the same start date\n",
" merged_events = grouped.agg({\n",
" 'flood_start': 'first',\n",
" 'flood_end': 'max'\n",
" })\n",
" # recalculate duration and reset identifiers/columns\n",
" merged_events['duration'] = (merged_events['flood_end'] - \\\n",
" merged_events['flood_start']).dt.days + 1\n",
" merged_events = merged_events.sort_values('flood_start').reset_index(drop = True)\n",
" merged_events['event'] = merged_events.index\n",
" merged_events.set_index('event', inplace = True)\n",
" merged_events = merged_events[['flood_start', 'flood_end', 'duration']]\n",
" # add to result\n",
" d_events_merged[admin_unit] = merged_events\n",
"\n",
" return d_events_merged\n",
"\n",
"\n",
"def process_impact_data_to_events(\n",
" df: pd.DataFrame, verbose: bool = False\n",
" ) -> Dict[str, pd.DataFrame]:\n",
Expand Down Expand Up @@ -2356,15 +2388,17 @@
" # 'Region', 'Commune', 'Quartier/Villages']]\n",
" dict_events[cercle] = df_events\n",
"\n",
" # merge duplicate events\n",
" dict_events_merged = merge_duplicate_events(dict_events)\n",
" # export to csv and return\n",
" export_dict_impact_events_to_csv(dict_events, verbose)\n",
" export_dict_impact_events_to_csv(dict_events_merged, verbose)\n",
" \n",
" return dict_events"
]
},
{
"cell_type": "code",
"execution_count": 122,
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -2373,7 +2407,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -2396,12 +2430,16 @@
"\n",
"\n",
"def subset_events_on_unit_and_date(\n",
" d_events: Dict[str, pd.DataFrame], d_units, earliest_date: str, latest_date: str\n",
" d_events: Dict[str, pd.DataFrame], d_units: Dict[str, xr.Dataset],\n",
" earliest_date: str, latest_date: str\n",
" ) -> Dict[str, pd.DataFrame]:\n",
" \"\"\"\n",
" Subset the events on the available administrative units and dates,\n",
" while also returning a list of admin units with no impact data\n",
"\n",
" while also returning a list of admin units with no impact data.\n",
" This also includes looking per year if impact data is available\n",
" for an administrative unit. If not, the events for that unit for\n",
" that year are discarded (that is, from the flood event data).\n",
" \n",
" :param d_events: dictionary with events\n",
" :param d_units: dictionary with available administrative units\n",
" :param earliest_date: earliest date\n",
Expand Down Expand Up @@ -2681,46 +2719,6 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# filter out double impact events by checking whether events have the \n",
"# same start date, and then merge them to an event with the start date\n",
"# of the first, and end date of the last event with the same start date\n",
"def merge_duplicate_events(d_events: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]:\n",
" \"\"\"\n",
" Merges duplicate impact events that have the same\n",
" flood start date for each administrative unit\n",
"\n",
" :param dict_events: dict with au codas keys and events dfs as values\n",
" :return: same dict, but merged\n",
" \"\"\"\n",
" d_events_merged = {}\n",
"\n",
" for admin_unit, df_events in d_events.items():\n",
" df_events = df_events.reset_index(drop = True)\n",
" grouped = df_events.groupby('flood_start', as_index = False)\n",
" # merge events with the same start date\n",
" merged_events = grouped.agg({\n",
" 'flood_start': 'first',\n",
" 'flood_end': 'max'\n",
" })\n",
" # recalculate duration and reset identifiers/columns\n",
" merged_events['duration'] = (merged_events['flood_end'] - \\\n",
" merged_events['flood_start']).dt.days + 1\n",
" merged_events = merged_events.sort_values('flood_start').reset_index(drop = True)\n",
" merged_events['event'] = merged_events.index\n",
" merged_events.set_index('event', inplace = True)\n",
" merged_events = merged_events[['flood_start', 'flood_end', 'duration']]\n",
" # add to result\n",
" d_events_merged[admin_unit] = merged_events\n",
"\n",
" return d_events_merged"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
Expand Down Expand Up @@ -2795,7 +2793,7 @@
}
],
"source": [
"dict_impact_events_final = merge_duplicate_events(dict_impact_events_subset)\n",
"# dict_impact_events_final = merge_duplicate_events(dict_impact_events_subset)\n",
"export_dict_impact_events_to_csv(dict_impact_events_final)\n",
"print(dict_impact_events_final)"
]
Expand Down

0 comments on commit eb13dd6

Please sign in to comment.