added probability calculator and flood definer

rodekruis · Nov 13, 2024 · 838e5df · 838e5df
1 parent 4abfde7
commit 838e5df
Show file tree

Hide file tree

Showing 6 changed files with 118 additions and 68 deletions.
diff --git a/GloFAS/GloFAS_analysis/concatenate.py b/GloFAS/GloFAS_analysis/concatenate.py
diff --git a/GloFAS/GloFAS_analysis/performance_calculator.py b/GloFAS/GloFAS_analysis/performance_calculator.py
diff --git a/GloFAS/GloFAS_analysis/probability_calculator.py b/GloFAS/GloFAS_analysis/probability_calculator.py
@@ -0,0 +1,96 @@
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+from datetime import datetime, timedelta
+from joblib import Parallel, delayed
+from GloFAS.GloFAS_prep.aggregation import aggregation
+from GloFAS.GloFAS_data_extractor.reforecast_dataFetch import get_monthsdays
+from GloFAS.GloFAS_data_extractor.forecast_dataFetch import compute_dates_range
+from GloFAS.GloFAS_data_extractor.threshold_opener import openThreshold
+from GloFAS.GloFAS_data_extractor.unzip_open import openGloFAS, unzipGloFAS
+import GloFAS.GloFAS_prep.configuration as cfg
+
+class FloodProbabilityProcessor:
+    def __init__(self, leadtime, adminLevel, forecastType, measure='max', start_date=None, end_date=None):
+        # start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'
+        self.leadtime = leadtime
+        self.adminLevel = adminLevel
+        self.measure = measure
+        self.DataDir = cfg.DataDir
+        self.crs = cfg.crs
+        self.RPyr = cfg.RPyr
+        self.area = cfg.MaliArea
+        self.lakesPath = cfg.lakesPath
+        self.forecastType = forecastType
+        if forecastType == 'reforecast':
+            self.MONTHSDAYS = get_monthsdays()
+        elif forecastType ==  'forecast':
+            self.MONTHSDAYSYEARS = compute_dates_range(start_date, end_date)
+        # Initializations that rely on data
+        self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '07')
+        self.reference_Q_da = openGloFAS(self.reference_rasterPath, self.lakesPath, self.crs)
+        self.threshold_da = openThreshold(self.DataDir, self.crs, self.RPyr, self.area, self.reference_Q_da)
+        self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=cfg.measure)
+
+    def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
+        '''Check exceedance of threshold values for a single ensemble member'''
+        GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
+        exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
+        return exceedance
+
+    def probability(self, Q_da, threshold_gdf, nrCores):
+        '''Calculate exceedance probability across ensemble members for a single time step'''
+        nrMembers = len(Q_da.number)
+        exceedance_count = np.zeros(len(threshold_gdf), dtype=int)
+
+        # Calculate exceedances for each ensemble in parallel
+        results = Parallel(n_jobs=nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
+        for result in results:
+            exceedance_count += result
+        probability = exceedance_count / nrMembers
+
+        # Prepare results as GeoDataFrame
+        flooded_df = threshold_gdf.copy()
+        flooded_df['ExceedanceCount'] = exceedance_count
+        flooded_df['Probability'] = probability
+        return flooded_df
+
+    def concatenate_prob(self, Q_da, nrCores):
+        '''Concatenate exceedance probability calculations over multiple time steps'''
+        all_years_data = []
+        if self.forecastType=='reforecast':
+            for month, day in self.MONTHSDAYS:
+                print(f"Starting for {month}, {day}")
+                Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
+                Q_da = openGloFAS(Q_da_rasterpath, self.leadtime)  # Open data for the specific month/day
+                for timestamp in Q_da.time:  # Iterate over years
+                    startLoop = datetime.now()
+                    valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
+                    # Calculate probability
+                    flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
+                    flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
+                    all_years_data.append(flooded_df)
+                    print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
+
+        if self.forecastType=='forecast': 
+            for date in self.MONTHSDAYSYEARS:
+                startLoop = datetime.now()
+                year  = date.strftime('%Y')
+                month = date.strftime('%m')
+                day   = date.strftime('%d')
+                Q_da_rasterpath = unzipGloFAS(self.DataDir, self.leadtime, month, day, year)
+                Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
+                valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
+                # Calculate probability
+                flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
+                flooded_df['ValidTime'] = valid_timestamp  # Add timestamp
+                all_years_data.append(flooded_df)
+                print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
+        # Concatenate results for all time steps
+        floodProb_gdf_concat = pd.concat(all_years_data, ignore_index=True)
+        floodProb_gdf_concat = gpd.GeoDataFrame(floodProb_gdf_concat, geometry='geometry')
+
+        # Save to file
+        output_path = f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg"
+        floodProb_gdf_concat.to_file(output_path, driver="GPKG")
+        return floodProb_gdf_concat
diff --git a/GloFAS/GloFAS_analysis/probability_exceeding_calculator.py b/GloFAS/GloFAS_analysis/probability_exceeding_calculator.py
diff --git a/GloFAS/GloFAS_prep/configuration.py b/GloFAS/GloFAS_prep/configuration.py
@@ -29,4 +29,5 @@
 adminLevel = 2 # choose level on which you would like to aggregate : 1,2,3
 years = np.arange(startYear, endYear, 1)
 admPath = adminPaths [(adminLevel-1)] # generate the useful administrative unit path 
-nrCores = 6 #determine number of cpu cores to use
+nrCores = 6 #determine number of cpu cores to use
+measure = 'max' # measure to aggregate on 
diff --git a/GloFAS/GloFAS_prep/flood_definer.py b/GloFAS/GloFAS_prep/flood_definer.py
@@ -0,0 +1,20 @@
+from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
+floodProbability_path = self.DataDir/ f"floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg"
+
+def stampTrigger (floodProbability_gdf, actionLifetime, triggerProb):
+    '''defines trigger by the floodprobability that it needs to exceed, as well as writes the timeduration at which the trigger is valid 
+    actionLifetime = actionLifetime in days'''
+
+    # make everything capital
+    floodProbability_gdf[f'ADM{self.adminLevel}'] = floodProbability_gdf[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())
+
+
+    floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') # - timedelta(days=self.buffervalid)
+    # incorporate actionlifetime
+    floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') + timedelta(days=actionLifetime)
+    floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['StartValidTime'], format='%d/%m/%Y', errors='coerce')
+    floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['EndValidTime'], format='%d/%m/%Y', errors='coerce')
+    # Add Trigger column based on probability threshold
+    floodCommune_gdf['Trigger'] = np.where(floodCommune_gdf['Probability'] >= triggerProb, 1, 0)
+
+    return floodCommune_gdf