Merge branch 'main' of https://github.com/rodekruis/river-flood-data-…

…analysis
rodekruis · Nov 22, 2024 · cd3850c · cd3850c
2 parents 0226fc2 + 23f5ce2
commit cd3850c
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 17 deletions.
diff --git a/GloFAS/GloFAS_analysis/performance_calculator.py b/GloFAS/GloFAS_analysis/performance_calculator.py
@@ -5,10 +5,11 @@
 import matplotlib.pyplot as plt
 from datetime import datetime, timedelta
 from GloFAS.GloFAS_analysis.flood_definer import FloodDefiner
+from comparison.HydroImpact import loop_over_stations
 from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
 import GloFAS.GloFAS_prep.configuration as cfg
 class PredictedToImpactPerformanceAnalyzer:
-    def __init__(self, DataDir, RPyr, leadtime, impactDataPath, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf):
+    def __init__(self, DataDir, RPyr, leadtime, impactData, triggerProb, adminLevel, adminPath, startYear, endYear, years, PredictedEvents_gdf):
         """
         Initialize the FloodPerformanceAnalyzer class with the required data.
         
@@ -28,9 +29,12 @@ def __init__(self, DataDir, RPyr, leadtime, impactDataPath, triggerProb, adminLe
         self.startYear = startYear
         self.endYear = endYear
         self.years = years
-        self.impactDataPath = impactDataPath
+        self.impactData = impactData
         self.PredictedEvents_gdf = PredictedEvents_gdf
-        self.impact_gdf = self.openObservedImpact_gdf()
+        if isinstance (self.impactData, (str, Path)):
+            self.impact_gdf = self.openObservedImpact_gdf()
+        elif isinstance (self.impactData, gpd.GeoDataFrame):
+            self.impact_gdf = self.impactData
          # days before and after validtime the prediction is also valid
 
 
@@ -59,6 +63,7 @@ def openObservedImpact_gdf(self):
         impact_gdf = pd.merge(self.gdf_shape, df_filtered, how='left', left_on=f'ADM{cfg.adminLevel}', right_on=f'ADM{cfg.adminLevel}')
 
         return impact_gdf 
+
     def _check_ifmatched (self, commune, startdate, enddate):
         match = self.impact_gdf[
                         (self.impact_gdf[f'ADM{self.adminLevel}'] == commune) & 
@@ -123,7 +128,7 @@ def clean_and_add_GloFAS (self, PredictedEvents_gdf):
         # Append the renamed rows to impact_gdf
         self.impact_gdf = pd.concat([self.impact_gdf, remaining_rows], ignore_index=True)
 
-    def _check_impact(self, PredictedEvents_gdf, commune, startdate, enddate):
+    def _check_impact(self, PredictedEvents_gdf, commune, startdate):
         '''Check if impact that has happened in the commune between given dates is RECORDED by glofas.'''
         match = PredictedEvents_gdf[
                                 (PredictedEvents_gdf[f'ADM{self.adminLevel}'] == commune) & 
@@ -144,7 +149,7 @@ def matchImpact_and_Trigger(self):
         # Add Impact column using the check impact date (which only works on the impact gdf)
 
         self.impact_gdf['Event'] = self.impact_gdf.apply(
-            lambda row: self._check_impact(self.PredictedEvents_gdf, row[f'ADM{self.adminLevel}'], row['Start Date'], row['End Date']),
+            lambda row: self._check_impact(self.PredictedEvents_gdf, row[f'ADM{self.adminLevel}'], row['Start Date']),
             axis=1
         )
         # Clean and add GloFAS to self.impact_gdf
@@ -186,17 +191,20 @@ def calculateCommunePerformance(self):
             lambda x: self.calc_performance_scores(x['Impact'], x['Event'])
         )
         scores_byCommune_gdf = self.gdf_shape.merge(scores_by_commune, on=f'ADM{cfg.adminLevel}')
-        scores_byCommune_gdf.to_file (f"{self.DataDir}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
+        scores_byCommune_gdf.to_file (f"{self.DataDir}/glofas_to_hydrodata/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime:.0f}.shp")
         return scores_byCommune_gdf
 
-if __name__=='__main__': 
-    for leadtime in cfg.leadtimes: 
-        for RPyr in cfg.RPsyr: 
+if __name__=='__main__':
+    for RPyr in cfg.RPsyr: 
+        hydro_impact_gdf = loop_over_stations (cfg.DNHstations , cfg.DataDir, RPyr, cfg.admPath)
+        for leadtime in cfg.leadtimes: 
             floodProbability_path = cfg.DataDir/ f"floodedRP{RPyr}yr_leadtime{leadtime}_ADM{cfg.adminLevel}.gpkg"
             floodProbability_gdf = checkVectorFormat (floodProbability_path)
+            #calculate the flood events
             definer = FloodDefiner (cfg.adminLevel)
             PredictedEvents_gdf = definer.EventMaker (floodProbability_gdf, cfg.actionLifetime, cfg.triggerProb)
-            analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, cfg.impact_csvPath, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf)
+        #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
+            analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, leadtime, hydro_impact_gdf, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PredictedEvents_gdf)
             analyzer.matchImpact_and_Trigger()
             analyzer.calculateCommunePerformance()
 

diff --git a/GloFAS/GloFAS_analysis/HydroImpact.py → comparison/HydroImpact.py b/GloFAS/GloFAS_analysis/HydroImpact.py → comparison/HydroImpact.py
@@ -5,7 +5,6 @@
 import scipy.stats as stats
 from comparison.pointMatching import attributePoints_to_Polygon
 
-
 def parse_date_with_fallback(date_str, year):
     try:
         # Try to parse the date with the given year
@@ -65,7 +64,7 @@ def QRP_fit (hydro_df, RP):
 
     # 2. Fit a Gumbel distribution to the annual maximum discharge values
     loc, scale = stats.gumbel_r.fit(annual_max_discharge)
-
+    print (RP)
     # 4. Calculate the discharge value corresponding to the return period
     discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale)
     return discharge_value
@@ -163,10 +162,11 @@ def createEvent(trigger_df):
     else:
         # Return an empty GeoDataFrame if no events were found
         # Initialize an empty dataframe 
-        events_df = pd.DataFrame(columns=['Event', 'StartDate', 'EndDate'])
+        events_df = pd.DataFrame(columns=['Event', 'Start Date', 'End Date'])
         return events_df
 
 def loop_over_stations(station_csv, DataDir, RP, admPath): 
+    RP = float(RP)
     station_df = pd.read_csv (station_csv, header=0)
     #print (station_df.columns)
     hydrodir = rf"{DataDir}/DNHMali_2019\Q_stations"
@@ -181,6 +181,7 @@ def loop_over_stations(station_csv, DataDir, RP, admPath):
         except: 
             print (f'no discharge measures found for station {StationName} in {BasinName}')
             continue
+
         trigger_df = stampHydroTrigger (hydro_df, RP, StationName)
         event_df = createEvent (trigger_df)
         event_df ['StationName'] = StationName
@@ -209,9 +210,6 @@ def loop_over_stations(station_csv, DataDir, RP, admPath):
 
 
 if __name__=="__main__": 
-    DataDir = cfg.DataDir
-    station_csv = cfg.DNHstations
-    HydroStations_RP_file = DataDir / f"DNHMali_2019/HydroStations_RP.csv"
     #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
-    event_gdf = loop_over_stations (station_csv, DataDir, 5, cfg.admPath)
+    event_gdf = loop_over_stations (cfg.DNHstations, cfg.DataDir, 5, cfg.admPath)
     print (event_gdf)