Skip to content

Commit

Permalink
something goes wrong in performance calculator for observation compar…
Browse files Browse the repository at this point in the history
…ison : no string values in adm2
  • Loading branch information
ERKuipers committed Nov 22, 2024
1 parent 2893fcd commit e2f9e5c
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 11 deletions.
22 changes: 15 additions & 7 deletions GloFAS/GloFAS_analysis/performance_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
import GloFAS.GloFAS_prep.configuration as cfg
class PredictedToImpactPerformanceAnalyzer:
def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf):
def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf, comparisonType):
"""
Initialize the FloodPerformanceAnalyzer class with the required data.
Expand All @@ -30,6 +30,7 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel,
self.years = years
self.impactData = impactData
self.PredictedEvents_gdf = PredictedEvents_gdf
self.comparisonType = comparisonType
if isinstance (self.impactData, (str)):
self.impact_gdf = self.openObservedImpact_gdf()
elif isinstance (self.impactData, gpd.GeoDataFrame):
Expand All @@ -39,15 +40,18 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel,

def openObservedImpact_gdf(self):
# Load the data
df = pd.read_csv(self.impactDataPath)
if self.impactData.endswith('.csv'):
df = pd.read_csv(self.impactData)
else:
df = gpd.read_file(self.impactData)

# Convert 'End Date' and 'Start Date' to datetime
df['End Date'] = pd.to_datetime(df['End Date'], format='%d/%m/%Y', errors='coerce')
df['Start Date'] = pd.to_datetime(df['Start Date'], format='%d/%m/%Y', errors= 'coerce')

# Filter rows between 2004 and 2022 ()
df_filtered = df[(df['End Date'].dt.year >= self.startYear) & (df['End Date'].dt.year < self.endYear)]

# Remove non-string entries from ADM columns
df_filtered = df_filtered[df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: isinstance(x, str))]
self.gdf_shape = self.gdf_shape[self.gdf_shape[f'ADM{self.adminLevel}_FR'].apply(lambda x: isinstance(x, str))]
Expand All @@ -56,6 +60,7 @@ def openObservedImpact_gdf(self):
self.gdf_shape.rename(columns={f'ADM{cfg.adminLevel}_FR':f'ADM{cfg.adminLevel}'}, inplace=True)
self.gdf_shape[f'ADM{cfg.adminLevel}'] = self.gdf_shape[f'ADM{cfg.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())
# Apply normalization to both DataFrames (converting to uppercase and removing special characters)

df_filtered[f'ADM{self.adminLevel}'] = df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())

# Merge the CSV data with the shapefile data
Expand Down Expand Up @@ -135,6 +140,7 @@ def _check_impact(self, PredictedEvents_gdf, commune, startdate):
(PredictedEvents_gdf['EndValidTime'] >= startdate) &
(PredictedEvents_gdf['Event']==1)
]
print (match)
return 1 if not match.empty else 0


Expand Down Expand Up @@ -188,24 +194,26 @@ def calculateCommunePerformance(self):
# Group by 'Commune' and calculate performance scores for each group
print (self.impact_gdf.columns)
print (self.impact_gdf.head)

scores_by_commune = self.impact_gdf.groupby(f'ADM{self.adminLevel}').apply(
lambda x: self.calc_performance_scores(x['Impact'], x['Event'])
lambda x: self.calc_performance_scores(x[f'{self.comparisonType}'], x['Event'])
)
scores_byCommune_gdf = self.gdf_shape.merge(scores_by_commune, on=f'ADM{cfg.adminLevel}')
scores_byCommune_gdf.to_file (f"{self.DataDir}/glofas_to_hydrodata/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
scores_byCommune_gdf.to_file (f"{self.DataDir}/{comparisonType}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
return scores_byCommune_gdf

if __name__=='__main__':
for RPyr in cfg.RPsyr:
hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath)
hydro_impact_gdf = f'{cfg.DataDir}/Impact_from_hydro_RP_{RPyr}.gpkg'
#hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath, cfg.adminLevel)
for leadtime in cfg.leadtimes:
floodProbability_path = cfg.DataDir/ f"floodedRP{RPyr}yr_leadtime{leadtime}_ADM{cfg.adminLevel}.gpkg"
floodProbability_gdf = checkVectorFormat (floodProbability_path)
#calculate the flood events
definer = FloodDefiner (cfg.adminLevel)
PredictedEvents_gdf = definer.EventMaker (floodProbability_gdf, cfg.actionLifetime, cfg.triggerProb)
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf)
analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf, 'Observation')
analyzer.matchImpact_and_Trigger()
analyzer.calculateCommunePerformance()

Expand Down
4 changes: 2 additions & 2 deletions comparison/HydroImpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def createEvent(trigger_df):

# Create a temporary dataframe for the current event
temp_event_df = pd.DataFrame({
'Event': [Event],
'Observation': [Event],
'Start Date': [StartDate],
'End Date': [final_endtime],
})
Expand All @@ -161,7 +161,7 @@ def createEvent(trigger_df):
else:
# Return an empty GeoDataFrame if no events were found
# Initialize an empty dataframe
events_df = pd.DataFrame(columns=['Event', 'Start Date', 'End Date'])
events_df = pd.DataFrame(columns=['Observation', 'Start Date', 'End Date'])
return events_df

def loop_over_stations(station_csv, DataDir, RP, admPath, adminLevel):
Expand Down
4 changes: 2 additions & 2 deletions comparison/pointMatching.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def attributePoints_to_Polygon(
Column name in vector2 identifying the polygons.
crs : str, optional
Coordinate reference system for all data. Defaults to 'EPSG:4326'.
buffer_distance : float, optional
border_tolerance : float, optional
Distance in meters to expand the polygons for including nearby points. Defaults to 5000 (5 km).
StationDataDir : str or Path, optional
Directory where the output CSV file will be saved. Default is the current working directory.
Expand All @@ -165,7 +165,7 @@ def attributePoints_to_Polygon(

# Apply a buffer to the polygons
expanded_polygons_gdf = polygons_gdf.copy()
expanded_polygons_gdf['geometry'] = expanded_polygons_gdf.geometry.buffer(buffer_distance)
expanded_polygons_gdf['geometry'] = expanded_polygons_gdf.geometry.buffer(border_tolerance)

# Initialize a new column in the polygons GeoDataFrame to store point IDs
polygons_gdf[f'{ID2}'] = None
Expand Down
File renamed without changes.

0 comments on commit e2f9e5c

Please sign in to comment.