Skip to content

Commit

Permalink
added probability calculator and flood definer
Browse files Browse the repository at this point in the history
  • Loading branch information
ERKuipers committed Nov 13, 2024
1 parent 4abfde7 commit 838e5df
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 68 deletions.
19 changes: 0 additions & 19 deletions GloFAS/GloFAS_analysis/concatenate.py

This file was deleted.

Empty file.
96 changes: 96 additions & 0 deletions GloFAS/GloFAS_analysis/probability_calculator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
from joblib import Parallel, delayed
from GloFAS.GloFAS_prep.aggregation import aggregation
from GloFAS.GloFAS_data_extractor.reforecast_dataFetch import get_monthsdays
from GloFAS.GloFAS_data_extractor.forecast_dataFetch import compute_dates_range
from GloFAS.GloFAS_data_extractor.threshold_opener import openThreshold
from GloFAS.GloFAS_data_extractor.unzip_open import openGloFAS, unzipGloFAS
import GloFAS.GloFAS_prep.configuration as cfg

class FloodProbabilityProcessor:
def __init__(self, leadtime, adminLevel, forecastType, measure='max', start_date=None, end_date=None):
# start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'
self.leadtime = leadtime
self.adminLevel = adminLevel
self.measure = measure
self.DataDir = cfg.DataDir
self.crs = cfg.crs
self.RPyr = cfg.RPyr
self.area = cfg.MaliArea
self.lakesPath = cfg.lakesPath
self.forecastType = forecastType
if forecastType == 'reforecast':
self.MONTHSDAYS = get_monthsdays()
elif forecastType == 'forecast':
self.MONTHSDAYSYEARS = compute_dates_range(start_date, end_date)
# Initializations that rely on data
self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '07')
self.reference_Q_da = openGloFAS(self.reference_rasterPath, self.lakesPath, self.crs)
self.threshold_da = openThreshold(self.DataDir, self.crs, self.RPyr, self.area, self.reference_Q_da)
self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=cfg.measure)

def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
'''Check exceedance of threshold values for a single ensemble member'''
GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
return exceedance

def probability(self, Q_da, threshold_gdf, nrCores):
'''Calculate exceedance probability across ensemble members for a single time step'''
nrMembers = len(Q_da.number)
exceedance_count = np.zeros(len(threshold_gdf), dtype=int)

# Calculate exceedances for each ensemble in parallel
results = Parallel(n_jobs=nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
for result in results:
exceedance_count += result
probability = exceedance_count / nrMembers

# Prepare results as GeoDataFrame
flooded_df = threshold_gdf.copy()
flooded_df['ExceedanceCount'] = exceedance_count
flooded_df['Probability'] = probability
return flooded_df

def concatenate_prob(self, Q_da, nrCores):
'''Concatenate exceedance probability calculations over multiple time steps'''
all_years_data = []
if self.forecastType=='reforecast':
for month, day in self.MONTHSDAYS:
print(f"Starting for {month}, {day}")
Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
Q_da = openGloFAS(Q_da_rasterpath, self.leadtime) # Open data for the specific month/day
for timestamp in Q_da.time: # Iterate over years
startLoop = datetime.now()
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")

if self.forecastType=='forecast':
for date in self.MONTHSDAYSYEARS:
startLoop = datetime.now()
year = date.strftime('%Y')
month = date.strftime('%m')
day = date.strftime('%d')
Q_da_rasterpath = unzipGloFAS(self.DataDir, self.leadtime, month, day, year)
Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
# Concatenate results for all time steps
floodProb_gdf_concat = pd.concat(all_years_data, ignore_index=True)
floodProb_gdf_concat = gpd.GeoDataFrame(floodProb_gdf_concat, geometry='geometry')

# Save to file
output_path = f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg"
floodProb_gdf_concat.to_file(output_path, driver="GPKG")
return floodProb_gdf_concat
48 changes: 0 additions & 48 deletions GloFAS/GloFAS_analysis/probability_exceeding_calculator.py

This file was deleted.

3 changes: 2 additions & 1 deletion GloFAS/GloFAS_prep/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@
adminLevel = 2 # choose level on which you would like to aggregate : 1,2,3
years = np.arange(startYear, endYear, 1)
admPath = adminPaths [(adminLevel-1)] # generate the useful administrative unit path
nrCores = 6 #determine number of cpu cores to use
nrCores = 6 #determine number of cpu cores to use
measure = 'max' # measure to aggregate on
20 changes: 20 additions & 0 deletions GloFAS/GloFAS_prep/flood_definer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
floodProbability_path = self.DataDir/ f"floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg"

def stampTrigger (floodProbability_gdf, actionLifetime, triggerProb):
'''defines trigger by the floodprobability that it needs to exceed, as well as writes the timeduration at which the trigger is valid
actionLifetime = actionLifetime in days'''

# make everything capital
floodProbability_gdf[f'ADM{self.adminLevel}'] = floodProbability_gdf[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper())


floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') # - timedelta(days=self.buffervalid)
# incorporate actionlifetime
floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') + timedelta(days=actionLifetime)
floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['StartValidTime'], format='%d/%m/%Y', errors='coerce')
floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['EndValidTime'], format='%d/%m/%Y', errors='coerce')
# Add Trigger column based on probability threshold
floodCommune_gdf['Trigger'] = np.where(floodCommune_gdf['Probability'] >= triggerProb, 1, 0)

return floodCommune_gdf

0 comments on commit 838e5df

Please sign in to comment.