Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Nov 22, 2024
2 parents eb3df09 + e2f9e5c commit 9418704
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 59 deletions.
27 changes: 18 additions & 9 deletions GloFAS/GloFAS_analysis/performance_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
import GloFAS.GloFAS_prep.configuration as cfg
class PredictedToImpactPerformanceAnalyzer:
def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf):
def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf, comparisonType):
"""
Initialize the FloodPerformanceAnalyzer class with the required data.
Expand All @@ -19,7 +19,6 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel,
should make this a csv possiblity
triggerProb (float): Probability threshold to classify flood triggers.
"""

self.triggerProb = triggerProb
self.adminLevel = adminLevel
self.gdf_shape = gpd.read_file(adminPath)
Expand All @@ -31,7 +30,8 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel,
self.years = years
self.impactData = impactData
self.PredictedEvents_gdf = PredictedEvents_gdf
if isinstance (self.impactData, (str, Path)):
self.comparisonType = comparisonType
if isinstance (self.impactData, (str)):
self.impact_gdf = self.openObservedImpact_gdf()
elif isinstance (self.impactData, gpd.GeoDataFrame):
self.impact_gdf = self.impactData
Expand All @@ -40,15 +40,18 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel,

def openObservedImpact_gdf(self):
# Load the data
df = pd.read_csv(self.impactDataPath)
if self.impactData.endswith('.csv'):
df = pd.read_csv(self.impactData)
else:
df = gpd.read_file(self.impactData)

# Convert 'End Date' and 'Start Date' to datetime
df['End Date'] = pd.to_datetime(df['End Date'], format='%d/%m/%Y', errors='coerce')
df['Start Date'] = pd.to_datetime(df['Start Date'], format='%d/%m/%Y', errors= 'coerce')

# Filter rows between 2004 and 2022 ()
df_filtered = df[(df['End Date'].dt.year >= self.startYear) & (df['End Date'].dt.year < self.endYear)]

# Remove non-string entries from ADM columns
df_filtered = df_filtered[df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: isinstance(x, str))]
self.gdf_shape = self.gdf_shape[self.gdf_shape[f'ADM{self.adminLevel}_FR'].apply(lambda x: isinstance(x, str))]
Expand All @@ -57,6 +60,7 @@ def openObservedImpact_gdf(self):
self.gdf_shape.rename(columns={f'ADM{cfg.adminLevel}_FR':f'ADM{cfg.adminLevel}'}, inplace=True)
self.gdf_shape[f'ADM{cfg.adminLevel}'] = self.gdf_shape[f'ADM{cfg.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())
# Apply normalization to both DataFrames (converting to uppercase and removing special characters)

df_filtered[f'ADM{self.adminLevel}'] = df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())

# Merge the CSV data with the shapefile data
Expand Down Expand Up @@ -136,6 +140,7 @@ def _check_impact(self, PredictedEvents_gdf, commune, startdate):
(PredictedEvents_gdf['EndValidTime'] >= startdate) &
(PredictedEvents_gdf['Event']==1)
]
print (match)
return 1 if not match.empty else 0


Expand Down Expand Up @@ -187,24 +192,28 @@ def calculateCommunePerformance(self):
Calculate the performance scores for each commune and merge them back into the GeoDataFrame.
"""
# Group by 'Commune' and calculate performance scores for each group
print (self.impact_gdf.columns)
print (self.impact_gdf.head)

scores_by_commune = self.impact_gdf.groupby(f'ADM{self.adminLevel}').apply(
lambda x: self.calc_performance_scores(x['Impact'], x['Event'])
lambda x: self.calc_performance_scores(x[f'{self.comparisonType}'], x['Event'])
)
scores_byCommune_gdf = self.gdf_shape.merge(scores_by_commune, on=f'ADM{cfg.adminLevel}')
scores_byCommune_gdf.to_file (f"{self.DataDir}/glofas_to_hydrodata/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
scores_byCommune_gdf.to_file (f"{self.DataDir}/{comparisonType}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
return scores_byCommune_gdf

if __name__=='__main__':
for RPyr in cfg.RPsyr:
hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath)
hydro_impact_gdf = f'{cfg.DataDir}/Impact_from_hydro_RP_{RPyr}.gpkg'
#hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath, cfg.adminLevel)
for leadtime in cfg.leadtimes:
floodProbability_path = cfg.DataDir/ f"floodedRP{RPyr}yr_leadtime{leadtime}_ADM{cfg.adminLevel}.gpkg"
floodProbability_gdf = checkVectorFormat (floodProbability_path)
#calculate the flood events
definer = FloodDefiner (cfg.adminLevel)
PredictedEvents_gdf = definer.EventMaker (floodProbability_gdf, cfg.actionLifetime, cfg.triggerProb)
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf)
analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf, 'Observation')
analyzer.matchImpact_and_Trigger()
analyzer.calculateCommunePerformance()

Expand Down
7 changes: 0 additions & 7 deletions GloFAS/GloFAS_prep/accent_remover.py

This file was deleted.

14 changes: 14 additions & 0 deletions GloFAS/GloFAS_prep/text_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unicodedata
def remove_accents(input_str):
"""Remove accents from a given string."""
if isinstance(input_str, str): # Only process strings
nfkd_form = unicodedata.normalize('NFKD', input_str)
return "".join([c for c in nfkd_form if not unicodedata.combining(c)])
return input_str # Return as is if not a string

def capitalize(input_str):
"""Remove accents and capitalize the input string."""
if isinstance(input_str, str): # Only process strings
input_str = remove_accents(input_str) # Remove accents first
return input_str.upper() # Capitalize (convert to uppercase)
return input_str
27 changes: 13 additions & 14 deletions comparison/HydroImpact.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@

import GloFAS.GloFAS_prep.configuration as cfg
import pandas as pd
from GloFAS.GloFAS_prep.accent_remover import remove_accents
from GloFAS.GloFAS_prep.text_formatter import remove_accents, capitalize
import scipy.stats as stats
from comparison.pointMatching import attributePoints_to_Polygon

import geopandas as gpd
def parse_date_with_fallback(date_str, year):
try:
# Try to parse the date with the given year
Expand Down Expand Up @@ -64,7 +64,6 @@ def QRP_fit (hydro_df, RP):

# 2. Fit a Gumbel distribution to the annual maximum discharge values
loc, scale = stats.gumbel_r.fit(annual_max_discharge)
print (RP)
# 4. Calculate the discharge value corresponding to the return period
discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale)
return discharge_value
Expand Down Expand Up @@ -142,9 +141,9 @@ def createEvent(trigger_df):

# Create a temporary dataframe for the current event
temp_event_df = pd.DataFrame({
'Event': [Event],
'StartDate': [StartDate],
'EndDate': [final_endtime],
'Observation': [Event],
'Start Date': [StartDate],
'End Date': [final_endtime],
})

# Append the event to the list of events
Expand All @@ -162,10 +161,10 @@ def createEvent(trigger_df):
else:
# Return an empty GeoDataFrame if no events were found
# Initialize an empty dataframe
events_df = pd.DataFrame(columns=['Event', 'Start Date', 'End Date'])
events_df = pd.DataFrame(columns=['Observation', 'Start Date', 'End Date'])
return events_df

def loop_over_stations(station_csv, DataDir, RP, admPath):
def loop_over_stations(station_csv, DataDir, RP, admPath, adminLevel):
RP = float(RP)
station_df = pd.read_csv (station_csv, header=0)
#print (station_df.columns)
Expand All @@ -189,11 +188,10 @@ def loop_over_stations(station_csv, DataDir, RP, admPath):

#generate the gdf to merge with where the points are attributed to the respective administrative units


all_events_df = pd.concat (all_events, ignore_index=True)
print (all_events_df.columns)

gdf_pointPolygon = attributePoints_to_Polygon (admPath, station_csv, 'StationName')
print (gdf_pointPolygon.columns)
gdf_pointPolygon.rename(columns={f'ADM{adminLevel}_FR':f'ADM{adminLevel}'}, inplace=True)
gdf_melt = gdf_pointPolygon.melt(
id_vars=gdf_pointPolygon.columns.difference(['StationName', 'StationName_0', 'StationName_1', 'StationName_2']),
value_vars=['StationName', 'StationName_0', 'StationName_1', 'StationName_2'],
Expand All @@ -204,12 +202,13 @@ def loop_over_stations(station_csv, DataDir, RP, admPath):

# Proceed with the merge
hydro_events_df = pd.merge(gdf_melt, all_events_df, left_on='StationName_Merged', right_on='StationName', how='inner')

hydro_events_gdf = gpd.GeoDataFrame(hydro_events_df, geometry='geometry')
hydro_events_df [f'ADM{adminLevel}'] = hydro_events_df [f'ADM{adminLevel}'].apply(capitalize)
hydro_events_gdf = gpd.GeoDataFrame(hydro_events_df, geometry='geometry')
hydro_events_gdf.to_file(f"{DataDir}/Impact_from_hydro_RP_{RP}.gpkg")
return hydro_events_gdf


if __name__=="__main__":
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_gdf = loop_over_stations (cfg.DNHstations, cfg.DataDir, 5, cfg.admPath)
event_gdf = loop_over_stations (cfg.DNHstations, cfg.DataDir, 5, cfg.admPath, cfg.adminLevel)
print (event_gdf)
65 changes: 36 additions & 29 deletions comparison/pointMatching.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from scipy.spatial import cKDTree
from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
import GloFAS.GloFAS_prep.configuration as cfg
from GloFAS.GloFAS_prep.accent_remover import remove_accents
from GloFAS.GloFAS_prep.text_formatter import remove_accents

def findclosestpoint(point_x, point_y, target_gdf):
'''
Expand All @@ -32,8 +32,6 @@ def findclosestpoint(point_x, point_y, target_gdf):
return target_gdf.iloc[idx[0]] # Return row of the closest point




def matchPoints(
vector_original,
ID1,
Expand Down Expand Up @@ -120,16 +118,17 @@ def matchPoints(
return match_df

def attributePoints_to_Polygon(
vectorPolygon,
vectorPoint,
ID2,
crs='EPSG:4326',
StationDataDir=Path.cwd(),
filename='attributePoints_to_Polygon.csv'
):
vectorPolygon,
vectorPoint,
ID2,
crs='EPSG:4326',
border_tolerance=5000, # Buffer distance in meters
StationDataDir=Path.cwd(),
filename='attributePoints_to_Polygon.csv'
):
'''
Assigns points from vector_original to polygons in vector2, adding a new column in vector2 that contains the IDs of all points
from vector_original that fall within each polygon.
from vector_original that fall within each polygon or within a specified buffer distance.
Parameters:
vectorPolygon : str or GeoDataFrame
Expand All @@ -142,12 +141,14 @@ def attributePoints_to_Polygon(
Column name in vector2 identifying the polygons.
crs : str, optional
Coordinate reference system for all data. Defaults to 'EPSG:4326'.
border_tolerance : float, optional
Distance in meters to expand the polygons for including nearby points. Defaults to 5000 (5 km).
StationDataDir : str or Path, optional
Directory where the output CSV file will be saved. Default is the current working directory.
Writes:
CSV
A CSV file containing the polygon ID and the IDs of points within each polygon.
A CSV file containing the polygon ID and the IDs of points within or near each polygon.
Returns:
GeoDataFrame
Expand All @@ -162,25 +163,29 @@ def attributePoints_to_Polygon(
if points_gdf.crs != polygons_gdf.crs:
points_gdf = points_gdf.to_crs(polygons_gdf.crs)

# Apply a buffer to the polygons
expanded_polygons_gdf = polygons_gdf.copy()
expanded_polygons_gdf['geometry'] = expanded_polygons_gdf.geometry.buffer(border_tolerance)

# Initialize a new column in the polygons GeoDataFrame to store point IDs
polygons_gdf[f'{ID2}'] = None

# Loop through each polygon to find points within it
for idx, polygon_row in polygons_gdf.iterrows():
# Get the geometry of the current polygon
polygon_geom = polygon_row.geometry
for idx, polygon_row in expanded_polygons_gdf.iterrows():
# Get the geometry of the current buffered polygon
expanded_polygon_geom = polygon_row.geometry

# Find points that fall within this polygon
points_within = points_gdf[points_gdf.geometry.within(polygon_geom)]
# Find points that fall within this expanded polygon
points_within = points_gdf[points_gdf.geometry.within(expanded_polygon_geom)]

# Collect the IDs of these points
point_ids = points_within[ID2].tolist()
# Update the GeoDataFrame with the list of point IDs

nrstations_inpolygon = len (point_ids)
if nrstations_inpolygon>0:
# Update the original polygons GeoDataFrame with the list of point IDs
nrstations_inpolygon = len(point_ids)
if nrstations_inpolygon > 0:
polygons_gdf.at[idx, f'{ID2}'] = remove_accents(point_ids[0])
if nrstations_inpolygon>1:
if nrstations_inpolygon > 1:
for stationnr in range(nrstations_inpolygon):
polygons_gdf.at[idx, f'{ID2}_{stationnr}'] = remove_accents(point_ids[stationnr-1])

Expand All @@ -189,14 +194,16 @@ def attributePoints_to_Polygon(
polygons_gdf.drop(columns='geometry').to_csv(output_file, index=False)
return polygons_gdf


if __name__ == '__main__':
result = attributePoints_to_Polygon(
cfg.admPath,
cfg.DNHstations,
'StationName',
crs=cfg.crs,
StationDataDir=cfg.stationsDir,
filename='DNHstations_in_ADM2.csv'
)
result = attributePoints_to_Polygon(
cfg.admPath,
cfg.DNHstations,
'StationName',
crs=cfg.crs,
border_tolerance=5000, # to tolerate the bolder
StationDataDir=cfg.stationsDir,
filename='DNHstations_in_ADM2.csv'
)

# matchPoints(cfg.GloFASstations, 'StationName', cfg.googlestations, 'gaugeId', crs=cfg.crs, StationDataDir=cfg.stationsDir)
Empty file.
File renamed without changes.

0 comments on commit 9418704

Please sign in to comment.