Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Nov 25, 2024
2 parents 758e310 + 754e7de commit ff94630
Show file tree
Hide file tree
Showing 61 changed files with 5,060 additions and 68 deletions.
1 change: 1 addition & 0 deletions GloFAS/results/Modelled_vsImpactRP5.0_yr_leadtime168.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
1 change: 1 addition & 0 deletions GloFAS/results/Modelled_vsImpactRP5.0_yr_leadtime168.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file not shown.
Binary file not shown.
155 changes: 155 additions & 0 deletions comparison/impact/merging_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import configuration as cfg
import pandas as pd
from datetime import datetime, timedelta

DataDir = cfg.DataDir / 'impact'
MasterInnondationPath = DataDir / 'CleanedImpactInnondations_220623.csv'
DesinventarTextminingPath = DataDir / 'merged_desinventar_textmining.csv'

# Load the data
innondation_df = pd.read_csv(MasterInnondationPath)
desinventar_df = pd.read_csv(DesinventarTextminingPath)

import pandas as pd
from datetime import timedelta

# Convert dates to datetime objects, using dayfirst=True for day/month/year format
innondation_df['Start Date'] = pd.to_datetime(innondation_df['Start Date'], dayfirst=True)
innondation_df['End Date'] = pd.to_datetime(innondation_df['End Date'], dayfirst=True)
desinventar_df['Start Date'] = pd.to_datetime(desinventar_df['date'], dayfirst=True)
desinventar_df['End Date'] = pd.to_datetime(desinventar_df['date'], dayfirst=True)

# Normalize administrative units to uppercase in both datasets
innondation_df['Commune'] = innondation_df['Commune'].str.upper()
innondation_df['Cercle'] = innondation_df['Cercle'].str.upper()
innondation_df['Région'] = innondation_df['Région'].str.upper()

desinventar_df['Commune'] = desinventar_df['commune (adm3)'].str.upper()
desinventar_df['Cercle'] = desinventar_df['cercle (adm2)'].str.upper()
desinventar_df['Région'] = desinventar_df['region (adm1)'].str.upper()

# Function to split rows with multiple administrative units
def split_administrative_units(df):
expanded_rows = [] # To store the expanded rows
thrown_out_entries = [] # To store duplicates

# Iterate through each row
for idx, row in df.iterrows():
# Split the values in 'Commune', 'Cercle', and 'Région' by comma (if they contain commas)
communes = str(row['Commune']).split(',') if pd.notna(row['Commune']) else [None]
cercles = str(row['Cercle']).split(',') if pd.notna(row['Cercle']) else [None]
regions = str(row['Région']).split(',') if pd.notna(row['Région']) else [None]

# For each combination of commune, cercle, and region, create a new entry
for commune in communes:
for cercle in cercles:
for region in regions:
# Create a new row
new_row = row.copy()
new_row['Commune'] = commune.strip() if commune else None
new_row['Cercle'] = cercle.strip() if cercle else None
new_row['Région'] = region.strip() if region else None

# Check if this entry is a duplicate by comparing it to existing expanded rows
is_duplicate = any(
(existing['Commune'] == new_row['Commune']) and
(existing['Cercle'] == new_row['Cercle']) and
(existing['Région'] == new_row['Région']) and
(existing['Start Date'] == new_row['Start Date']) and
(existing['End Date'] == new_row['End Date'])
for existing in expanded_rows
)

# If it's not a duplicate, add it to the expanded rows
if not is_duplicate:
expanded_rows.append(new_row)
else:
# Log duplicate entry
admin_unit = new_row['Commune'] or new_row['Cercle'] or new_row['Région']
timestamp = f"{new_row['Start Date']} - {new_row['End Date']}"
print(f"Double entry for {admin_unit} at time {timestamp}")

# Convert the list of expanded rows back into a DataFrame
expanded_df = pd.DataFrame(expanded_rows)
return expanded_df
# Limit events longer than 14 days to their first 10 days
def limit_long_events(df):
df['Event Duration'] = (df['End Date'] - df['Start Date']).dt.days
df.loc[df['Event Duration'] > 14, 'End Date'] = df['Start Date'] + timedelta(days=14)
return df

# Convert dates to datetime objects if they aren't already
innondation_df['Start Date'] = pd.to_datetime(innondation_df['Start Date'], dayfirst=True)
innondation_df['End Date'] = pd.to_datetime(innondation_df['End Date'], dayfirst=True)

innondation_df = limit_long_events (innondation_df)
# Apply the splitting function
innondation_df_expanded = split_administrative_units(innondation_df)

# Function to check if two date ranges overlap
def is_date_overlap(start1, end1, start2, end2):
return max(start1, start2) <= min(end1, end2)

# Function to check if two rows match based on administrative units
def is_admin_unit_match(row1, row2):
# 1. First check if both communes are available and match
if pd.notna(row1['Commune']) and pd.notna(row2['Commune']):
return row1['Commune'] == row2['Commune'] # Match only if Communes are the same

# 2. If Commune is missing in either dataset, check Cercle
if pd.isna(row1['Commune']) or pd.isna(row2['Commune']):
if pd.notna(row1['Cercle']) and pd.notna(row2['Cercle']):
return row1['Cercle'] == row2['Cercle'] # Match only if Cercles are the same

# 3. If Commune and Cercle are missing, check Région
if (pd.isna(row1['Commune']) and pd.isna(row2['Commune'])) or (pd.isna(row1['Cercle']) and pd.isna(row2['Cercle'])):
return row1['Région'] == row2['Région'] # Match only if Régions are the same

return False # No match if none of the above conditions apply

# Merge and deduplicate while adding additional info where relevant
def merge_with_additional_info(innondation_df, desinventar_df):
merged_data = innondation_df.copy()
thrown_out_entries = []

# Iterate over desinventar entries
for _, row2 in desinventar_df.iterrows():
overlap_found = False

# Iterate over innondation entries
for idx1, row1 in merged_data.iterrows():
same_admin_unit = is_admin_unit_match(row1, row2) # Check for matching administrative unit

# If same administrative unit and date overlap
if same_admin_unit and is_date_overlap(row1['Start Date'], row1['End Date'], row2['Start Date'], row2['End Date']):
overlap_found = True

# Compare date ranges, update if desinventar has more information
duration1 = (row1['End Date'] - row1['Start Date']).days
duration2 = (row2['End Date'] - row2['Start Date']).days
if duration2 > duration1:
# Update innondation row with longer coverage or missing data from desinventar
merged_data.at[idx1, 'Start Date'] = row2['Start Date']
merged_data.at[idx1, 'End Date'] = row2['End Date']

# Add extra administrative info if desinventar has more detail
if pd.isna(row1['Commune']) and pd.notna(row2['Commune']):
merged_data.at[idx1, 'Commune'] = row2['Commune']
if pd.isna(row1['Cercle']) and pd.notna(row2['Cercle']):
merged_data.at[idx1, 'Cercle'] = row2['Cercle']
if pd.isna(row1['Région']) and pd.notna(row2['Région']):
merged_data.at[idx1, 'Région'] = row2['Région']

break # Stop searching once overlap is found

# If no overlap found, append the desinventar row
if not overlap_found:
merged_data = pd.concat([merged_data, pd.DataFrame([row2])], ignore_index=True)

return merged_data

# Merge datasets
merged_df = merge_with_additional_info(innondation_df_expanded, desinventar_df)

# Save final merged data
merged_df.to_csv(DataDir / 'MergedImpactData.csv', index=False)
25 changes: 13 additions & 12 deletions comparison/HydroImpact.py → comparison/observation/HydroImpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def loop_over_stations(station_csv, DataDir, RP, admPath, adminLevel):
stationPath = rf"{hydrodir}/{BasinName}_{StationName}.csv"
try:
hydro_df = transform_hydro (stationPath)

except:
print (f'no discharge measures found for station {StationName} in {BasinName}')
continue
Expand All @@ -190,25 +189,27 @@ def loop_over_stations(station_csv, DataDir, RP, admPath, adminLevel):

all_events_df = pd.concat (all_events, ignore_index=True)

gdf_pointPolygon = attributePoints_to_Polygon (admPath, station_csv, 'StationName')
gdf_pointPolygon = attributePoints_to_Polygon (admPath, station_csv, 'StationName', buffer_distance_meters=5000, StationDataDir=cfg.stationsDir)
gdf_pointPolygon.rename(columns={f'ADM{adminLevel}_FR':f'ADM{adminLevel}'}, inplace=True)
gdf_pointPolygon [f'ADM{adminLevel}'] = gdf_pointPolygon [f'ADM{adminLevel}'].apply(capitalize)
gdf_melt = gdf_pointPolygon.melt(
id_vars=gdf_pointPolygon.columns.difference(['StationName', 'StationName_0', 'StationName_1', 'StationName_2']),
value_vars=['StationName', 'StationName_0', 'StationName_1', 'StationName_2'],
id_vars=gdf_pointPolygon.columns.difference(['StationName_1', 'StationName_2', 'StationName_3', 'StationName_4']),
value_vars=['StationName_1', 'StationName_2', 'StationName_3', 'StationName_4'],
var_name='StationName_Type', # Temporary column indicating the source column
value_name='StationName_Merged' # Use a unique column name here
)
gdf_melt = gdf_melt.dropna(subset=['StationName_Merged'])

gdf_melt = gdf_melt.drop(columns='geometry')#.to_csv (f"{DataDir}/observation/adm_flood_events_RP{RP}yr.csv")
# Proceed with the merge
hydro_events_df = pd.merge(gdf_melt, all_events_df, left_on='StationName_Merged', right_on='StationName', how='inner')
hydro_events_df [f'ADM{adminLevel}'] = hydro_events_df [f'ADM{adminLevel}'].apply(capitalize)
hydro_events_gdf = gpd.GeoDataFrame(hydro_events_df, geometry='geometry')
hydro_events_gdf.to_file(f"{DataDir}/Impact_from_hydro_RP_{RP}.gpkg")
return hydro_events_gdf
hydro_events_df.to_csv (f"{DataDir}/observation/observational_flood_events_RP_{RP}yr.csv")
#hydro_events_gdf = gpd.GeoDataFrame(hydro_events_df, geometry='geometry')
#hydro_events_gdf.to_file(f"{DataDir}/observation/observational_flood_events_RP_{RP}yr.gpkg")
#hydro_events_gdf.to_file
return hydro_events_df


if __name__=="__main__":
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_gdf = loop_over_stations (cfg.DNHstations, cfg.DataDir, 5, cfg.admPath, cfg.adminLevel)
print (event_gdf)
for RP in cfg.RPsyr:
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_gdf = loop_over_stations (cfg.DNHstations, cfg.DataDir, RP, cfg.admPath, cfg.adminLevel)
Binary file not shown.
45 changes: 45 additions & 0 deletions comparison/observation/data/DNHMali_2019/HydroStations_RP.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
StationName,Sub-Basin,Latitude N (°),Longitude W (°),Altitude Z (m),superficie du BV (km2),dates de creation ,temps propagation (ans),2,5,10,20,50,100,200,500,1000,
Guélélinkoro,Sankarani,11.15,-8.5643,,23300,,ND,,,,,,,,,,
Séligué Aval,,11.64,"-8,23",325,34200,1964,,618,717,790,853,897,939,981,1031,1063,
Gouala,,11.97,"-8,23",-,35300,-,,576,654,692,729,767,795,821,856,874,
Banankoro, Niger Supérieur,11.68,"-8,66",,71800,1967,le temps de progation entre Banankoro et Bamako peuvent être variables entre 4 et 12 jours. le temps de progation entre Banankoro et Bamako sont dépendants des apports du Sankarani. ,566,677,724,763,835,871,906,941,975,
Kéniéroba ,,12.1,"-8,31",-,113000,1953,,544,614,655,679,723,750,768,803,821,
Bamako,,12.63,"-7,99",316,117000,1941,,331,370,387,407,427,442,451,468,482,
Koulikoro,,12.86,"-7,56",290,120000,1907,Bamako - Koulikoro: 1 jour ,532,596,625,653,693,707,733,760,774,
Tamani,,13.33,"-6,83",282,131500,1952, Koulikoro - Tamani: 3 jours ,519,579,620,646,688,705,736,756,782,
Ségou ,,13.45,"-6,26",279,136500,1958,Tamani - Kirango : 2 jous,456,506,537,559,581,601,621,642,656,
Kirango,,13.69,"-6,07",275,137000,1925,,495,555,585,614,644,672,686,714,728,
Ké-Macina ,,13.95,"-5,35",277,147000,1953, Kirango - Ké-Macina : 1 jour ,564,623,658,681,715,737,757,782,793,
Mopti,Delta du Niger,14.49,"-4,20",261,281600,1922,ND,492,610,641,662,688,703,719,739,750,
Nantaka,,14.54,"-4,21",261,281600,-,,569,639,680,719,759,795,819,859,878,
Akka,,15.39,"-4,23",-,307000,1955,,459,486,504,522,546,563,575,592,604,
Niafunké,,15.93,"-3,98",258,348300,1926,,461,475,503,521,543,560,571,588,599,
Diré,,16.27,"-3,38",258,366500,1954,,329,496,515,531,551,563,573,586,595,
Koryoumé,Niger Moyen ,16.67,"-3,03",-,360000,1963,,412,482,512,526,551,561,575,590,604,
Taoussa,,16.97,"-0,55",250,340000,1954,,484,507,532,550,575,588,605,623,635,
Ansongo ,,15.66,"-00,49",242,566000,1950,,141,297,309,316,325,331,336,342,347,
Gao,,16.26,"-0,05",245,556000,1948,,336,413,430,446,462,472,482,495,502,
Labbezanga,,14.95,"-0,69",-,-,-,,204,249,261,269,279,287,293,299,305,
Sofara,Bani,14.01,"-4,25",263,129400,1952,,527,595,633,670,707,726,761,779,814,
Beneny Kegny,,13.38,"-4,92",266,116000,1951,,526,619,682,733,794,837,872,923,945,
Douna,,13.21,"-5,89",281,101300,1922,,563,702,768,825,888,939,963,1020,1055,
Kana,,12.95,"-06,34",283,-,-,,,,,,,,,,,
Bougouni,Baoulé,11.39,"-7,45",330,15700,1975,,672,782,876,964,1049,1120,1139,1207,1282,
Dioila,,12.51,"-6,83",292,32500,1953,,517,632,686,770,848,884,923,993,1052,
Kouoro1,Banifing,12.01,"-5,69",298,14300,1957,,691,875,983,1106,1181,1274,1345,1414,1544,
Pankourou,Bagoé,11.45,-6.58,292,32150,1956,,777,973,1066,1134,1240,1274,1351,1436,1519,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
Senegal Basin,,,,,,,,,,,,,,,,,
"Station names,",Sub-Basin,Latitude N (°),Longitude W (°),Altitude Z (m),Catchment area (km2),dates de creation ,,,,,,,,,,,
Daka Saidou,Bafing ,11.94,"-10,61",307,15700,1952,,,,,,,,,,,
Bafing Makana,,12.55,"-10,28",-,21000,1961,,,,,,,,,,,
Dibiya,,13.14,"-10,48",125,33500,1961,,,,,,,,,,,
MAHINA,,13.75,"-10,85",90,38400,1956,,,,,,,,,,,
Oualia,Bakoye,13.6,"-10,38",108,84700,1954,,,,,,,,,,,
Diangola,,12.8,"-9,48",-,12100,1973,,,,,,,,,,,
Gourbassy,Falèmé,13.39,"-11,63",-,17100,1954,,,,,,,,,,,
Kayes,Sénégal,14.44,"-11,44",20,157400,1903,,,,,,,,,,,
Gouina,,14.0161,-11.1054,48,128600,+,,,,,,,,,,,
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit ff94630

Please sign in to comment.