calculating probability as executed

rodekruis · Dec 12, 2024 · 59e2426 · 59e2426
1 parent b2e0cb4
commit 59e2426
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 15 deletions.
diff --git a/GloFAS/GloFAS_analysis/probability_calculator.py b/GloFAS/GloFAS_analysis/probability_calculator.py
@@ -19,12 +19,14 @@ def __init__(self,
                 lakesPath=cfg.lakesPath, 
                 crs=cfg.crs, 
                 adminLevel=None, 
+                GloFAS_stations = cfg.GloFASstations,
                 forecastType='reforecast', 
                 measure='max', 
                 start_date=None, 
                 end_date=None, 
                 nrCores=4, 
-                comparisonShape='polygon'):
+                comparisonShape='polygon',
+                ):
 
         # start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'
 
@@ -49,33 +51,33 @@ def __init__(self,
         self.comparisonShape = comparisonShape
         if comparisonShape == 'polygon':
             self.measure = measure
-            self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=self.measure)
+            self.threshold_gdf = aggregation(self.threshold_da, vector=adminPath, method='polygon', measure=self.measure)
             self.adminLevel = adminLevel
 
         elif comparisonShape == 'point': 
-            self.threshold_gdf = aggregation(self.threshold_da, GloFAS_stations) # aggregating the threshold value on the GloFAS station
+            self.threshold_gdf = aggregation(self.threshold_da, vector=GloFAS_stations, method='point') # aggregating the threshold value on the GloFAS station
         self.nrCores = nrCores
 
-    def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
+    def exceedance(self, Q_da, threshold_gdf, nrEnsemble, timestamp):
         '''Check exceedance of threshold values for a single ensemble member
         threshold_gdf should be in same shape as eventual outcme, so if comparisonShape=polygon, threshold_gdf should be in polygon'''
         if self.comparisonShape=='polygon':
-            GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
+            GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure, timestamp=timestamp)
             exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
         elif self.comparisonShape=='point': # assuming this then refers to aggregation to a point
-            GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble)
+            GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble=nrEnsemble, timestamp=timestamp)
             exceedance = GloFAS_gdf ['rastervalue'] > threshold_gdf['rastervalue']
         else: 
             raise ValueError (f"Not a valid shape : {self.comparisonShape}, pick between 'polygon' and 'point'")
         return exceedance
 
-    def probability(self, Q_da, threshold_gdf):
+    def probability(self, Q_da, threshold_gdf, timestamp):
         '''Calculate exceedance probability across ensemble members for a single time step'''
         nrMembers = len(Q_da.number)
         exceedance_count = np.zeros(len(threshold_gdf), dtype=int)
 
         # Calculate exceedances for each ensemble in parallel
-        results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
+        results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble, timestamp) for ensemble in Q_da.number)
         for result in results:
             exceedance_count += result
         probability = exceedance_count / nrMembers
@@ -94,11 +96,11 @@ def concatenate_prob(self):
                 print(f"Starting for {month}, {day}")
                 Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
                 Q_da = openGloFAS(Q_da_rasterpath, self.leadtime)  # Open data for the specific month/day
-                for timestamp in Q_da.time:  # Iterate over years
+                for timestamp in Q_da.time:  #Iterate over years
                     startLoop = datetime.now()
                     valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
                     # Calculate probability
-                    flooded_df = self.probability(Q_da, self.threshold_gdf, self.nrCores)
+                    flooded_df = self.probability(Q_da, self.threshold_gdf, timestamp)
                     flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
                     all_years_data.append(flooded_df)
                     print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
@@ -113,7 +115,7 @@ def concatenate_prob(self):
                 Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
                 valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
                 # Calculate probability
-                flooded_df = self.probability(Q_da, self.threshold_gdf, self.nrCores)
+                flooded_df = self.probability(Q_da, self.threshold_gdf)
                 flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
                 all_years_data.append(flooded_df)
                 print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
@@ -137,10 +139,11 @@ def concatenate_prob(self):
                                                 lakesPath=cfg.lakesPath,
                                                 crs=cfg.crs,
                                                 adminLevel=None,
+                                                GloFAS_stations=cfg.GloFASstations,
                                                 forecastType='reforecast',
                                                 measure=None,
                                                 start_date=None,
                                                 end_date=None,
                                                 nrCores=8,
                                                 comparisonShape='point')
-            prob.concatenate_prob(Q_da)
+            prob.concatenate_prob()
diff --git a/GloFAS/GloFAS_data_extractor/threshold_opener.py b/GloFAS/GloFAS_data_extractor/threshold_opener.py
@@ -2,7 +2,7 @@
 import rioxarray as rio 
 
 def openThreshold(DataDir, crs, RPyr, area, Q_da_forecast ):
-    threshold_ds = xr.load_dataset(DataDir / f"flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
+    threshold_ds = xr.load_dataset(DataDir / f"auxiliary/flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
     threshold_ds.rio.write_crs(crs, inplace=True)
 
     threshold_da = threshold_ds[f"rl_{RPyr:.1f}"].sel(

diff --git a/GloFAS/GloFAS_prep/aggregation.py b/GloFAS/GloFAS_prep/aggregation.py
@@ -93,7 +93,7 @@ def aggregation (rasterDA, vector, method, nrEnsemble=None, timestamp=None, meas
             rasterDA = rasterDA.sel(time=timestamp)
     # aggregation is about the model's location in relation to the model's output raster, therefore placement is in the model's framework
         # this argument is ignored when considering polygons, because on that level precision doesnt really matter
-    vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='model')
+    vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='real')
     if method == 'point': 
         pointgdf = query(rasterDA, vectorGDF)
         return pointgdf 

diff --git a/GloFAS/GloFAS_prep/configuration.py b/GloFAS/GloFAS_prep/configuration.py
@@ -18,7 +18,7 @@
 stationsDir = DataDir / f'stations'
 DNHstations = stationsDir / f"Stations_DNH.csv"
 googlestations = stationsDir / 'coords_google_gauges_Mali.csv'
-GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea_50km.csv'
+GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea.csv'
 impact_csvPath = DataDir / "impact/MergedImpactData.csv"
 settlements_tif = DataDir / "GlobalHumanSettlement/GHS_BUILT_S_E2030_GLOBE_R2023A_54009_100_V1_0.tif"