diff --git a/GloFAS/GloFAS_analysis/performance_calculator.py b/GloFAS/GloFAS_analysis/performance_calculator.py index 77369f5..515601c 100644 --- a/GloFAS/GloFAS_analysis/performance_calculator.py +++ b/GloFAS/GloFAS_analysis/performance_calculator.py @@ -9,7 +9,7 @@ from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat import GloFAS.GloFAS_prep.configuration as cfg class PredictedToImpactPerformanceAnalyzer: - def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf): + def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf, comparisonType): """ Initialize the FloodPerformanceAnalyzer class with the required data. @@ -30,6 +30,7 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, self.years = years self.impactData = impactData self.PredictedEvents_gdf = PredictedEvents_gdf + self.comparisonType = comparisonType if isinstance (self.impactData, (str)): self.impact_gdf = self.openObservedImpact_gdf() elif isinstance (self.impactData, gpd.GeoDataFrame): @@ -39,7 +40,10 @@ def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, def openObservedImpact_gdf(self): # Load the data - df = pd.read_csv(self.impactDataPath) + if self.impactData.endswith('.csv'): + df = pd.read_csv(self.impactData) + else: + df = gpd.read_file(self.impactData) # Convert 'End Date' and 'Start Date' to datetime df['End Date'] = pd.to_datetime(df['End Date'], format='%d/%m/%Y', errors='coerce') @@ -47,7 +51,7 @@ def openObservedImpact_gdf(self): # Filter rows between 2004 and 2022 () df_filtered = df[(df['End Date'].dt.year >= self.startYear) & (df['End Date'].dt.year < self.endYear)] - + # Remove non-string entries from ADM columns df_filtered = df_filtered[df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: isinstance(x, str))] self.gdf_shape = self.gdf_shape[self.gdf_shape[f'ADM{self.adminLevel}_FR'].apply(lambda x: isinstance(x, str))] @@ -56,6 +60,7 @@ def openObservedImpact_gdf(self): self.gdf_shape.rename(columns={f'ADM{cfg.adminLevel}_FR':f'ADM{cfg.adminLevel}'}, inplace=True) self.gdf_shape[f'ADM{cfg.adminLevel}'] = self.gdf_shape[f'ADM{cfg.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper()) # Apply normalization to both DataFrames (converting to uppercase and removing special characters) + df_filtered[f'ADM{self.adminLevel}'] = df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper()) # Merge the CSV data with the shapefile data @@ -135,6 +140,7 @@ def _check_impact(self, PredictedEvents_gdf, commune, startdate): (PredictedEvents_gdf['EndValidTime'] >= startdate) & (PredictedEvents_gdf['Event']==1) ] + print (match) return 1 if not match.empty else 0 @@ -188,16 +194,18 @@ def calculateCommunePerformance(self): # Group by 'Commune' and calculate performance scores for each group print (self.impact_gdf.columns) print (self.impact_gdf.head) + scores_by_commune = self.impact_gdf.groupby(f'ADM{self.adminLevel}').apply( - lambda x: self.calc_performance_scores(x['Impact'], x['Event']) + lambda x: self.calc_performance_scores(x[f'{self.comparisonType}'], x['Event']) ) scores_byCommune_gdf = self.gdf_shape.merge(scores_by_commune, on=f'ADM{cfg.adminLevel}') - scores_byCommune_gdf.to_file (f"{self.DataDir}/glofas_to_hydrodata/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp") + scores_byCommune_gdf.to_file (f"{self.DataDir}/{comparisonType}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp") return scores_byCommune_gdf if __name__=='__main__': for RPyr in cfg.RPsyr: - hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath) + hydro_impact_gdf = f'{cfg.DataDir}/Impact_from_hydro_RP_{RPyr}.gpkg' + #hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath, cfg.adminLevel) for leadtime in cfg.leadtimes: floodProbability_path = cfg.DataDir/ f"floodedRP{RPyr}yr_leadtime{leadtime}_ADM{cfg.adminLevel}.gpkg" floodProbability_gdf = checkVectorFormat (floodProbability_path) @@ -205,7 +213,7 @@ def calculateCommunePerformance(self): definer = FloodDefiner (cfg.adminLevel) PredictedEvents_gdf = definer.EventMaker (floodProbability_gdf, cfg.actionLifetime, cfg.triggerProb) #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv")) - analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf) + analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf, 'Observation') analyzer.matchImpact_and_Trigger() analyzer.calculateCommunePerformance() diff --git a/comparison/HydroImpact.py b/comparison/HydroImpact.py index 1752b46..d854002 100644 --- a/comparison/HydroImpact.py +++ b/comparison/HydroImpact.py @@ -141,7 +141,7 @@ def createEvent(trigger_df): # Create a temporary dataframe for the current event temp_event_df = pd.DataFrame({ - 'Event': [Event], + 'Observation': [Event], 'Start Date': [StartDate], 'End Date': [final_endtime], }) @@ -161,7 +161,7 @@ def createEvent(trigger_df): else: # Return an empty GeoDataFrame if no events were found # Initialize an empty dataframe - events_df = pd.DataFrame(columns=['Event', 'Start Date', 'End Date']) + events_df = pd.DataFrame(columns=['Observation', 'Start Date', 'End Date']) return events_df def loop_over_stations(station_csv, DataDir, RP, admPath, adminLevel): diff --git a/comparison/pointMatching.py b/comparison/pointMatching.py index 607de6d..988dbea 100644 --- a/comparison/pointMatching.py +++ b/comparison/pointMatching.py @@ -141,7 +141,7 @@ def attributePoints_to_Polygon( Column name in vector2 identifying the polygons. crs : str, optional Coordinate reference system for all data. Defaults to 'EPSG:4326'. - buffer_distance : float, optional + border_tolerance : float, optional Distance in meters to expand the polygons for including nearby points. Defaults to 5000 (5 km). StationDataDir : str or Path, optional Directory where the output CSV file will be saved. Default is the current working directory. @@ -165,7 +165,7 @@ def attributePoints_to_Polygon( # Apply a buffer to the polygons expanded_polygons_gdf = polygons_gdf.copy() - expanded_polygons_gdf['geometry'] = expanded_polygons_gdf.geometry.buffer(buffer_distance) + expanded_polygons_gdf['geometry'] = expanded_polygons_gdf.geometry.buffer(border_tolerance) # Initialize a new column in the polygons GeoDataFrame to store point IDs polygons_gdf[f'{ID2}'] = None diff --git a/comparison/journal_paper_list_of_figures_tables.md b/comparison/visualization/journal_paper_list_of_figures_tables.md similarity index 100% rename from comparison/journal_paper_list_of_figures_tables.md rename to comparison/visualization/journal_paper_list_of_figures_tables.md diff --git a/comparison/plot.py b/comparison/visualization/plot.py similarity index 100% rename from comparison/plot.py rename to comparison/visualization/plot.py