Merge branch 'main' of https://github.com/rodekruis/river-flood-data-…

…analysis
rodekruis · Dec 12, 2024 · 5c8aae5 · 5c8aae5
2 parents cfaad37 + 59e2426
commit 5c8aae5
Show file tree

Hide file tree

Showing 76 changed files with 140,123 additions and 33 deletions.
diff --git a/GloFAS/GloFAS_analysis/probability_calculator.py b/GloFAS/GloFAS_analysis/probability_calculator.py
@@ -14,16 +14,19 @@ class FloodProbabilityProcessor:
     def __init__(self, 
                 DataDir=cfg.DataDir, 
                 leadtime=168, 
+                RPyr=5,
                 area=cfg.MaliArea, 
                 lakesPath=cfg.lakesPath, 
                 crs=cfg.crs, 
                 adminLevel=None, 
+                GloFAS_stations = cfg.GloFASstations,
                 forecastType='reforecast', 
                 measure='max', 
                 start_date=None, 
                 end_date=None, 
                 nrCores=4, 
-                comparisonShape='polygon'):
+                comparisonShape='polygon',
+                ):
 
         # start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'
 
@@ -39,41 +42,42 @@ def __init__(self,
             self.MONTHSDAYS = get_monthsdays()
         elif forecastType ==  'forecast':
             self.MONTHSDAYSYEARS = compute_dates_range(start_date, end_date)
+
         # Initializations that rely on data
-        self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '07')
+        self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '08')
         self.reference_Q_da = openGloFAS(self.reference_rasterPath, self.lakesPath, self.crs)
         self.threshold_da = openThreshold(self.DataDir, self.crs, self.RPyr, self.area, self.reference_Q_da)
 
         self.comparisonShape = comparisonShape
         if comparisonShape == 'polygon':
             self.measure = measure
-            self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=self.measure)
+            self.threshold_gdf = aggregation(self.threshold_da, vector=adminPath, method='polygon', measure=self.measure)
             self.adminLevel = adminLevel
 
         elif comparisonShape == 'point': 
-            self.threshold_gdf = aggregation(self.threshold_da, GloFAS_stations)
+            self.threshold_gdf = aggregation(self.threshold_da, vector=GloFAS_stations, method='point') # aggregating the threshold value on the GloFAS station
         self.nrCores = nrCores
 
-    def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
+    def exceedance(self, Q_da, threshold_gdf, nrEnsemble, timestamp):
         '''Check exceedance of threshold values for a single ensemble member
         threshold_gdf should be in same shape as eventual outcme, so if comparisonShape=polygon, threshold_gdf should be in polygon'''
         if self.comparisonShape=='polygon':
-            GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
+            GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure, timestamp=timestamp)
             exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
         elif self.comparisonShape=='point': # assuming this then refers to aggregation to a point
-            GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble)
+            GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble=nrEnsemble, timestamp=timestamp)
             exceedance = GloFAS_gdf ['rastervalue'] > threshold_gdf['rastervalue']
         else: 
             raise ValueError (f"Not a valid shape : {self.comparisonShape}, pick between 'polygon' and 'point'")
         return exceedance
 
-    def probability(self, Q_da, threshold_gdf, nrCores):
+    def probability(self, Q_da, threshold_gdf, timestamp):
         '''Calculate exceedance probability across ensemble members for a single time step'''
         nrMembers = len(Q_da.number)
         exceedance_count = np.zeros(len(threshold_gdf), dtype=int)
 
         # Calculate exceedances for each ensemble in parallel
-        results = Parallel(n_jobs=nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
+        results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble, timestamp) for ensemble in Q_da.number)
         for result in results:
             exceedance_count += result
         probability = exceedance_count / nrMembers
@@ -84,19 +88,19 @@ def probability(self, Q_da, threshold_gdf, nrCores):
         flooded_df['Probability'] = probability
         return flooded_df
 
-    def concatenate_prob(self, Q_da, nrCores):
+    def concatenate_prob(self):
         '''Concatenate exceedance probability calculations over multiple time steps'''
         all_years_data = []
         if self.forecastType=='reforecast':
             for month, day in self.MONTHSDAYS:
                 print(f"Starting for {month}, {day}")
                 Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
                 Q_da = openGloFAS(Q_da_rasterpath, self.leadtime)  # Open data for the specific month/day
-                for timestamp in Q_da.time:  # Iterate over years
+                for timestamp in Q_da.time:  #Iterate over years
                     startLoop = datetime.now()
                     valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
                     # Calculate probability
-                    flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
+                    flooded_df = self.probability(Q_da, self.threshold_gdf, timestamp)
                     flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
                     all_years_data.append(flooded_df)
                     print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
@@ -111,7 +115,7 @@ def concatenate_prob(self, Q_da, nrCores):
                 Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
                 valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
                 # Calculate probability
-                flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
+                flooded_df = self.probability(Q_da, self.threshold_gdf)
                 flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
                 all_years_data.append(flooded_df)
                 print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
@@ -124,3 +128,22 @@ def concatenate_prob(self, Q_da, nrCores):
         output_path = f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}.gpkg"
         floodProb_gdf_concat.to_file(output_path, driver="GPKG")
         return floodProb_gdf_concat
+
+if __name__ == '__main__': 
+    for leadtime in cfg.leadtimes:
+        for RPyr in cfg.RPsyr:
+            prob = FloodProbabilityProcessor(DataDir=cfg.DataDir,
+                                                leadtime=leadtime,
+                                                RPyr=RPyr,
+                                                area=cfg.MaliArea,
+                                                lakesPath=cfg.lakesPath,
+                                                crs=cfg.crs,
+                                                adminLevel=None,
+                                                GloFAS_stations=cfg.GloFASstations,
+                                                forecastType='reforecast',
+                                                measure=None,
+                                                start_date=None,
+                                                end_date=None,
+                                                nrCores=8,
+                                                comparisonShape='point')
+            prob.concatenate_prob()
diff --git a/GloFAS/GloFAS_data_extractor/threshold_opener.py b/GloFAS/GloFAS_data_extractor/threshold_opener.py
@@ -2,7 +2,7 @@
 import rioxarray as rio 
 
 def openThreshold(DataDir, crs, RPyr, area, Q_da_forecast ):
-    threshold_ds = xr.load_dataset(DataDir / f"flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
+    threshold_ds = xr.load_dataset(DataDir / f"auxiliary/flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
     threshold_ds.rio.write_crs(crs, inplace=True)
 
     threshold_da = threshold_ds[f"rl_{RPyr:.1f}"].sel(

diff --git a/GloFAS/GloFAS_prep/aggregation.py b/GloFAS/GloFAS_prep/aggregation.py
@@ -93,7 +93,7 @@ def aggregation (rasterDA, vector, method, nrEnsemble=None, timestamp=None, meas
             rasterDA = rasterDA.sel(time=timestamp)
     # aggregation is about the model's location in relation to the model's output raster, therefore placement is in the model's framework
         # this argument is ignored when considering polygons, because on that level precision doesnt really matter
-    vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='model')
+    vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='real')
     if method == 'point': 
         pointgdf = query(rasterDA, vectorGDF)
         return pointgdf 

diff --git a/GloFAS/GloFAS_prep/configuration.py b/GloFAS/GloFAS_prep/configuration.py
@@ -4,9 +4,9 @@
 import os
 ################################ GLOFAS ###############################################
 
-os.chdir (f'C:\\Users\\els-2\\') 
+os.chdir (f'C:/Users/els-2/') 
 cur = Path.cwd() 
-DataDir = cur / 'MaliGloFAS\\data'
+DataDir = cur / 'MaliGloFAS/data'
 # Mali area coordinates as in notation as used by GloFAS (which is very weird)
 # north, west, south, east
 MaliArea = [25, -12.25, 10, 4.25] # Mali area [lat_max, lon_min, lat_min, lon_max] (upper left corner , down right corner)
@@ -18,7 +18,7 @@
 stationsDir = DataDir / f'stations'
 DNHstations = stationsDir / f"Stations_DNH.csv"
 googlestations = stationsDir / 'coords_google_gauges_Mali.csv'
-GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea_50km.csv'
+GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea.csv'
 impact_csvPath = DataDir / "impact/MergedImpactData.csv"
 settlements_tif = DataDir / "GlobalHumanSettlement/GHS_BUILT_S_E2030_GLOBE_R2023A_54009_100_V1_0.tif"
 

diff --git a/GloFAS/GloFAS_prep/station_to_glofas.py b/GloFAS/GloFAS_prep/station_to_glofas.py
@@ -25,12 +25,12 @@
         ups_area_point_m=row['Catchment area (km2)'] * 1e6,  # Convert from km² to m²
         stationName=row['Station names'],
         glofas_ups_area_xr=glofas_ups_area_da,
-        radius_m=100000
+        radius_m=10000
     )
     stations_upsarea_df.loc[i, 'Glofas_Point_X'] = glofas_x
     stations_upsarea_df.loc[i, 'Glofas_Point_Y'] = glofas_y
     stations_upsarea_df.loc[i, 'Area_Diff'] = area_diff
 
 # Print or save the updated DataFrame
 print(stations_upsarea_df.head())
-stations_upsarea_df.to_csv(f'{cfg.DataDir}/stations_Glofas_coordinates.csv')
+stations_upsarea_df.to_csv(f'{cfg.DataDir}/stations/stations_Glofas_coordinates.csv')