Skip to content

Commit

Permalink
Small improvements to the FloodHub data extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Nov 13, 2024
1 parent 0d6acba commit 416b6a7
Show file tree
Hide file tree
Showing 11 changed files with 27,727 additions and 29 deletions.
16,513 changes: 16,513 additions & 0 deletions GoogleFloodHub/data/floods_data/mali/2024-01-01_to_2024-11-13.csv

Large diffs are not rendered by default.

5,505 changes: 5,505 additions & 0 deletions GoogleFloodHub/data/floods_data/mali/2024-10-01_to_2024-11-12.csv

Large diffs are not rendered by default.

5,633 changes: 5,633 additions & 0 deletions GoogleFloodHub/data/floods_data/mali/2024-10-01_to_2024-11-13.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
gaugeId;gaugeValueUnit;qualityVerified;warningLevel;dangerLevel;extremeDangerLevel
hybas_1121890140;CUBIC_METERS_PER_SECOND;True;2164.199951171875;2447.35107421875;2765.17578125
hybas_1120739110;CUBIC_METERS_PER_SECOND;True;635.864990234375;852.9635009765625;1097.7147216796875
hybas_1120679780;CUBIC_METERS_PER_SECOND;True;3336.14013671875;3779.9326171875;4213.734375
hybas_1120758950;CUBIC_METERS_PER_SECOND;True;554.3900756835938;729.4489135742188;919.7537231445312
hybas_1120641660;CUBIC_METERS_PER_SECOND;True;2071.439697265625;2351.6884765625;2674.150390625
hybas_1121919510;CUBIC_METERS_PER_SECOND;True;3904.036376953125;4394.72998046875;4867.78173828125
hybas_1120705070;CUBIC_METERS_PER_SECOND;True;3896.59375;4392.75634765625;4894.8916015625
hybas_1120650110;CUBIC_METERS_PER_SECOND;True;67.53443145751953;128.68450927734375;251.50196838378906
hybas_1120661040;CUBIC_METERS_PER_SECOND;True;52.250953674316406;100.93582916259766;192.5716552734375
hybas_1120766460;CUBIC_METERS_PER_SECOND;True;834.0955200195312;1006.9996337890625;1238.7659912109375
hybas_1120679780;CUBIC_METERS_PER_SECOND;True;3336.14013671875;3779.9326171875;4213.734375
hybas_1120689830;CUBIC_METERS_PER_SECOND;True;3632.66845703125;4076.93603515625;4501.068359375
hybas_1120705070;CUBIC_METERS_PER_SECOND;True;3896.59375;4392.75634765625;4894.8916015625
hybas_1120737100;CUBIC_METERS_PER_SECOND;True;3081.07373046875;3490.791748046875;3896.62841796875
hybas_1121895840;CUBIC_METERS_PER_SECOND;True;3312.148193359375;3752.041015625;4179.9990234375
hybas_1120739110;CUBIC_METERS_PER_SECOND;True;635.864990234375;852.9635009765625;1097.7147216796875
hybas_1120758950;CUBIC_METERS_PER_SECOND;True;554.3900756835938;729.4489135742188;919.7537231445312
hybas_1120766460;CUBIC_METERS_PER_SECOND;True;834.0955200195312;1006.9996337890625;1238.7659912109375
hybas_1121890140;CUBIC_METERS_PER_SECOND;True;2164.199951171875;2447.35107421875;2765.17578125
hybas_1121893090;CUBIC_METERS_PER_SECOND;True;2487.1767578125;2800.095458984375;3149.558837890625
hybas_1121905290;CUBIC_METERS_PER_SECOND;True;3432.061279296875;3862.819580078125;4282.88427734375
hybas_1120650110;CUBIC_METERS_PER_SECOND;True;67.53443145751953;128.68450927734375;251.50196838378906
hybas_1121895840;CUBIC_METERS_PER_SECOND;True;3312.148193359375;3752.041015625;4179.9990234375
hybas_1121900350;CUBIC_METERS_PER_SECOND;True;3332.435791015625;3778.391845703125;4214.50927734375
hybas_1121905290;CUBIC_METERS_PER_SECOND;True;3432.061279296875;3862.819580078125;4282.88427734375
hybas_1121919510;CUBIC_METERS_PER_SECOND;True;3904.036376953125;4394.72998046875;4867.78173828125
20 changes: 10 additions & 10 deletions GoogleFloodHub/data/processed/ListGauges/Mali_gauges_listed.csv
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
siteName;source;river;gaugeId;qualityVerified;hasModel;latitude;longitude
;HYBAS;;hybas_1121900350;True;True;13.68541666666556;-5.893750000002797
;HYBAS;;hybas_1120641660;True;True;14.506249999999;-4.206250000002797
;HYBAS;;hybas_1120650110;True;True;14.277083333332143;-6.9270833333361
;HYBAS;;hybas_1120661040;True;True;13.985416666665571;-7.1520833333361225
;HYBAS;;hybas_1120679780;True;True;13.493749999998954;-6.202083333336134
;HYBAS;;hybas_1120689830;True;True;13.210416666665594;-7.07708333333602
;HYBAS;;hybas_1121890140;True;True;14.314583333332337;-4.422916666669437
;HYBAS;;hybas_1120705070;True;True;12.835416666665594;-7.58125000000274
;HYBAS;;hybas_1120737100;True;True;12.018749999998988;-8.322916666669414
;HYBAS;Sankarani;hybas_1120766460;True;True;11.22291666666553;-8.485416666669321
;HYBAS;;hybas_1120739110;True;True;11.977083333332189;-6.360416666669494
;HYBAS;;hybas_1120705070;True;True;12.835416666665594;-7.58125000000274
;HYBAS;;hybas_1120650110;True;True;14.277083333332143;-6.9270833333361
;HYBAS;Niger;hybas_1121919510;True;True;12.410416666665697;-8.106250000002774
;HYBAS;;hybas_1120758950;True;True;11.427083333332233;-6.581250000002682
;HYBAS;Sankarani;hybas_1120766460;True;True;11.22291666666553;-8.485416666669321
;HYBAS;;hybas_1121890140;True;True;14.314583333332337;-4.422916666669437
;HYBAS;;hybas_1121893090;True;True;14.131249999998944;-5.039583333336168
;HYBAS;;hybas_1121895840;True;True;13.956249999998818;-5.360416666669494
;HYBAS;;hybas_1120679780;True;True;13.493749999998954;-6.202083333336134
;HYBAS;;hybas_1121900350;True;True;13.68541666666556;-5.893750000002797
;HYBAS;;hybas_1121905290;True;True;13.360416666665628;-6.789583333336111
;HYBAS;;hybas_1121893090;True;True;14.131249999998944;-5.039583333336168
;HYBAS;;hybas_1120758950;True;True;11.427083333332233;-6.581250000002682
;HYBAS;;hybas_1120661040;True;True;13.985416666665571;-7.1520833333361225
;HYBAS;Niger;hybas_1121919510;True;True;12.410416666665697;-8.106250000002774
1 change: 1 addition & 0 deletions GoogleFloodHub/src/extract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
print('\nRunning __init__.py for GoogleFloodHub-data-extractor')

from .parse import validate_args
from .parse import validate_forecasts
from .exceptions import handle_exception
from .getters import get_API_key
from .getters import get_json_file
Expand Down
4 changes: 4 additions & 0 deletions GoogleFloodHub/src/extract/call_GetGaugeModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def convert_GetGaugeModel_to_df(response: List[Dict[str, Any]]) -> pd.DataFrame:
# df['extremeDangerLevel'] = df['thresholds'].apply(lambda x: x['extremeDangerLevel'])
# df['warningLevel'] = df['thresholds'].apply(lambda x: x['warningLevel'])
df.drop(columns = ['thresholds'], inplace = True)

# sort by gaugeId to ensure same response result for every API call
df['gaugeId'] = df['gaugeId'].astype(str).str.strip()
df = df.sort_values(by = 'gaugeId').reset_index(drop = True)

return df

Expand Down
5 changes: 5 additions & 0 deletions GoogleFloodHub/src/extract/call_ListGauges.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ def convert_ListGauges_to_df(gauges : List[Dict[str, Any]]) -> pd.DataFrame:
df['latitude'] = df['location'].apply(lambda x: x['latitude'])
df['longitude'] = df['location'].apply(lambda x: x['longitude'])
df.drop(columns = ['location'], inplace = True)
# sort by gaugeId to ensure same response result for every API call;
# this also makes the orger consistent between ListGauges and GetGaugeModel
df['gaugeId'] = df['gaugeId'].astype(str).str.strip()
df = df.sort_values(by = 'gaugeId').reset_index(drop = True)

return df


Expand Down
2 changes: 1 addition & 1 deletion GoogleFloodHub/src/extract/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def extract_country_data_for_time_delta(
path_API_key: str,
country: str,
delta: Tuple[datetime.datetime, datetime.datetime],
export: bool = True) -> pd.DataFrame:
export: bool = True) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Combines the calls of the
- ListGauges
Expand Down
40 changes: 38 additions & 2 deletions GoogleFloodHub/src/extract/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from typing import Tuple
from datetime import datetime
import pandas as pd


def parse_args(args: list) -> Tuple:
Expand Down Expand Up @@ -69,7 +70,7 @@ def validate_country(country: str) -> str:
return country


def validate_args(args: list) -> Tuple:
def validate_args(args: list) -> Tuple[str, datetime, datetime]:
"""
Validate the command line arguments using helper functions
Expand All @@ -82,4 +83,39 @@ def validate_args(args: list) -> Tuple:
validate_dates(a, b)
country = validate_country(country)

return country, a, b
return country, a, b


def validate_forecasts(
df: pd.DataFrame, dt: Tuple[datetime, datetime], country: str
) -> None:
"""
Validate the forecasts dataframe by checking if this it
contains the boundaries of the inputted time delta.
If it does, print a success message, if it does not,
print a warning message
:param df: forecasts dataframe
:param dt: tuple of two datetime objects (forming a time delta)
:param country: country name
"""
df['issue_date'] = pd.to_datetime(df['issue_date'])
min_issue_date = df['issue_date'].min()
max_issue_date = df['issue_date'].max()
a, b = dt
# minus 1 day because the API not always
# returns the data for the final day, e.g.
# when b is today or in the future
if min_issue_date > a or max_issue_date < b - pd.Timedelta(days = 1):
print(
f"Warning: Data is stored but may be incomplete. The request returned delta "
f"{min_issue_date.strftime('%Y-%m-%d')} to {max_issue_date.strftime('%Y-%m-%d')} "
f"which does not/partly cover the requested delta from {a.strftime('%Y-%m-%d')} "
f"to {b.strftime('%Y-%m-%d')}."
)
else:
print(
f"Extraction successful for {country} with issue dates from "
f"{min_issue_date.strftime('%Y-%m-%d')} spanning "
f"{(max_issue_date - min_issue_date).days} days of data"
)
13 changes: 7 additions & 6 deletions GoogleFloodHub/src/main_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
# Example usage: python3 main.py Mali 01-10-2024 07-10-2024
#
# For a more complete instruction, see the README of:
# https://github.com/valentijn7/GoogleFloodHub-data-extractor
# https://github.com/valentijn7/GoogleFloodHub-data-extractor;
# For the most recent version of the code, see:
# https://github.com/rodekruis/river-flood-data-analysis


import sys
Expand All @@ -20,11 +22,10 @@ def main():

try:
country, a, b = extract.validate_args(sys.argv)
_, _, _ = extract.extract_country_data_for_time_delta("../key.txt", country, (a, b))
print(
f"Extraction succesful for {country} with issue date {str(a)[:10]} "
f"and {str(b - a)[:2]} days of data"
)
_, _, forecasts = extract.extract_country_data_for_time_delta("../key.txt",
country,
(a, b))
extract.validate_forecasts(forecasts, (a, b), country)
except Exception as exc:
extract.handle_exception(exc)

Expand Down

0 comments on commit 416b6a7

Please sign in to comment.