From 838e5dfa126b302604e734d7f8fb51c3984cf6ba Mon Sep 17 00:00:00 2001 From: ERKuipers <elskuipers@protonmail.com> Date: Wed, 13 Nov 2024 13:49:10 +0100 Subject: [PATCH] added probability calculator and flood definer --- GloFAS/GloFAS_analysis/concatenate.py | 19 ---- .../GloFAS_analysis/performance_calculator.py | 0 .../GloFAS_analysis/probability_calculator.py | 96 +++++++++++++++++++ .../probability_exceeding_calculator.py | 48 ---------- GloFAS/GloFAS_prep/configuration.py | 3 +- GloFAS/GloFAS_prep/flood_definer.py | 20 ++++ 6 files changed, 118 insertions(+), 68 deletions(-) delete mode 100644 GloFAS/GloFAS_analysis/concatenate.py create mode 100644 GloFAS/GloFAS_analysis/performance_calculator.py create mode 100644 GloFAS/GloFAS_analysis/probability_calculator.py delete mode 100644 GloFAS/GloFAS_analysis/probability_exceeding_calculator.py create mode 100644 GloFAS/GloFAS_prep/flood_definer.py diff --git a/GloFAS/GloFAS_analysis/concatenate.py b/GloFAS/GloFAS_analysis/concatenate.py deleted file mode 100644 index 4c41fef..0000000 --- a/GloFAS/GloFAS_analysis/concatenate.py +++ /dev/null @@ -1,19 +0,0 @@ -from GloFAS.GloFAS_data_extractor.reforecast_dataFetch import get_monthsdays -from GloFAS.GloFAS_analysis.probability_exceeding_calculator import probability -def concatenate_prob(): - '''Concatenate GeoDataFrames over multiple day,months along the time dimension - order may be odd, but timestamp provided is still right''' - all_years_data = [] - MONTHSDAYS = get_monthsdays() - for md in MONTHSDAYS: - month = md[0].lower() - day = md[1] - floodedCommune_gdf = probability() # Open the GloFAS data for the current month, day !!!!!!!!!!!!! need to change now ! - all_years_data.append(floodedCommune_gdf) - - - # Concatenate all GeoDataFrames for different years - floodedCommune_gdf_concat = gpd.GeoDataFrame(pd.concat(all_years_data, ignore_index=True), geometry='geometry') - floodedCommune_gdf_concat.to_file(f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg", driver="GPKG") - - return floodedCommune_gdf_concat \ No newline at end of file diff --git a/GloFAS/GloFAS_analysis/performance_calculator.py b/GloFAS/GloFAS_analysis/performance_calculator.py new file mode 100644 index 0000000..e69de29 diff --git a/GloFAS/GloFAS_analysis/probability_calculator.py b/GloFAS/GloFAS_analysis/probability_calculator.py new file mode 100644 index 0000000..d27a29d --- /dev/null +++ b/GloFAS/GloFAS_analysis/probability_calculator.py @@ -0,0 +1,96 @@ +import numpy as np +import pandas as pd +import geopandas as gpd +from datetime import datetime, timedelta +from joblib import Parallel, delayed +from GloFAS.GloFAS_prep.aggregation import aggregation +from GloFAS.GloFAS_data_extractor.reforecast_dataFetch import get_monthsdays +from GloFAS.GloFAS_data_extractor.forecast_dataFetch import compute_dates_range +from GloFAS.GloFAS_data_extractor.threshold_opener import openThreshold +from GloFAS.GloFAS_data_extractor.unzip_open import openGloFAS, unzipGloFAS +import GloFAS.GloFAS_prep.configuration as cfg + +class FloodProbabilityProcessor: + def __init__(self, leadtime, adminLevel, forecastType, measure='max', start_date=None, end_date=None): + # start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast' + self.leadtime = leadtime + self.adminLevel = adminLevel + self.measure = measure + self.DataDir = cfg.DataDir + self.crs = cfg.crs + self.RPyr = cfg.RPyr + self.area = cfg.MaliArea + self.lakesPath = cfg.lakesPath + self.forecastType = forecastType + if forecastType == 'reforecast': + self.MONTHSDAYS = get_monthsdays() + elif forecastType == 'forecast': + self.MONTHSDAYSYEARS = compute_dates_range(start_date, end_date) + # Initializations that rely on data + self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '07') + self.reference_Q_da = openGloFAS(self.reference_rasterPath, self.lakesPath, self.crs) + self.threshold_da = openThreshold(self.DataDir, self.crs, self.RPyr, self.area, self.reference_Q_da) + self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=cfg.measure) + + def exceedance(self, Q_da, threshold_gdf, nrEnsemble): + '''Check exceedance of threshold values for a single ensemble member''' + GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure) + exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}'] + return exceedance + + def probability(self, Q_da, threshold_gdf, nrCores): + '''Calculate exceedance probability across ensemble members for a single time step''' + nrMembers = len(Q_da.number) + exceedance_count = np.zeros(len(threshold_gdf), dtype=int) + + # Calculate exceedances for each ensemble in parallel + results = Parallel(n_jobs=nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number) + for result in results: + exceedance_count += result + probability = exceedance_count / nrMembers + + # Prepare results as GeoDataFrame + flooded_df = threshold_gdf.copy() + flooded_df['ExceedanceCount'] = exceedance_count + flooded_df['Probability'] = probability + return flooded_df + + def concatenate_prob(self, Q_da, nrCores): + '''Concatenate exceedance probability calculations over multiple time steps''' + all_years_data = [] + if self.forecastType=='reforecast': + for month, day in self.MONTHSDAYS: + print(f"Starting for {month}, {day}") + Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day) + Q_da = openGloFAS(Q_da_rasterpath, self.leadtime) # Open data for the specific month/day + for timestamp in Q_da.time: # Iterate over years + startLoop = datetime.now() + valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime) + # Calculate probability + flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores) + flooded_df['ValidTime'] = valid_timestamp # Add timestamp + all_years_data.append(flooded_df) + print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}") + + if self.forecastType=='forecast': + for date in self.MONTHSDAYSYEARS: + startLoop = datetime.now() + year = date.strftime('%Y') + month = date.strftime('%m') + day = date.strftime('%d') + Q_da_rasterpath = unzipGloFAS(self.DataDir, self.leadtime, month, day, year) + Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs) + valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime) + # Calculate probability + flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores) + flooded_df['ValidTime'] = valid_timestamp # Add timestamp + all_years_data.append(flooded_df) + print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}") + # Concatenate results for all time steps + floodProb_gdf_concat = pd.concat(all_years_data, ignore_index=True) + floodProb_gdf_concat = gpd.GeoDataFrame(floodProb_gdf_concat, geometry='geometry') + + # Save to file + output_path = f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg" + floodProb_gdf_concat.to_file(output_path, driver="GPKG") + return floodProb_gdf_concat diff --git a/GloFAS/GloFAS_analysis/probability_exceeding_calculator.py b/GloFAS/GloFAS_analysis/probability_exceeding_calculator.py deleted file mode 100644 index c616298..0000000 --- a/GloFAS/GloFAS_analysis/probability_exceeding_calculator.py +++ /dev/null @@ -1,48 +0,0 @@ -# -from GloFAS.GloFAS_prep.aggregation import aggregation - -def exceedance (threshold_gdf, nrEnsemble, measure='max'): - '''GloFAS_gdf based on zonal stats using one ensemble member and timestamp: so only two dimensions ''' - GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=measure) - exceedance = GloFAS_gdf[f'{measure}'] > self.threshold_gdf[f'{measure}'] - return exceedance - -print (f'starting for {month, day}') -self.Q_da = self.openGloFAS(month, day) -print (f'for month, day {month, day}, glofas raster opened') -for timestamp in self.Q_da.time: # which will be the same month,day, but alternating years - startLoop = datetime.now() - valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime) - probability = probability - - -def probability(Q_da, threshold_gdf, nrCores, measure='max'): - '''calculates probability for one month/day/year, so put in for loop where you loop over each grib/netcdf file, and then again for the date that is in that netcdf file''' - - nrMembers = len(Q_da.number) # Number of ensemble members - commune_info_df = threshold_gdf[[f'ADM{self.adminLevel}', 'geometry']].copy() - floodedCommune_data = [] - exceedance_count = np.zeros(len(self.threshold_gdf), dtype=int) - - # Parallelize within the ensemble members - results = Parallel(n_jobs=nrCores)(delayed(exceedance)(threshold_gdf, nrEnsemble, measure=measure) for ensemble in Q_da.number) - - for result in results: - exceedance_count += result - probability = exceedance_count / nrMembers - - flooded_df = pd.DataFrame({ - f'ADM{self.adminLevel}': self.thresholdCommuneMax_gdf[f'ADM{self.adminLevel}'], - 'ExceedanceCount': exceedance_count, - 'Probability': probability, - 'ValidTime': valid_timestamp - }) - - flooded_df = flooded_df.join(commune_info_df.set_index(f'ADM{self.adminLevel}'), on=f'ADM{self.adminLevel}') - floodedCommune_data.append(flooded_df) - - print(f'timestep {valid_timestamp} done, took: {datetime.now() - startLoop}') - - floodedCommune_gdf = pd.concat(floodedCommune_data, ignore_index=True) - floodedCommune_gdf = gpd.GeoDataFrame(floodedCommune_gdf, geometry='geometry') - return floodedCommune_gdf diff --git a/GloFAS/GloFAS_prep/configuration.py b/GloFAS/GloFAS_prep/configuration.py index 558593b..49a15b4 100644 --- a/GloFAS/GloFAS_prep/configuration.py +++ b/GloFAS/GloFAS_prep/configuration.py @@ -29,4 +29,5 @@ adminLevel = 2 # choose level on which you would like to aggregate : 1,2,3 years = np.arange(startYear, endYear, 1) admPath = adminPaths [(adminLevel-1)] # generate the useful administrative unit path -nrCores = 6 #determine number of cpu cores to use \ No newline at end of file +nrCores = 6 #determine number of cpu cores to use +measure = 'max' # measure to aggregate on \ No newline at end of file diff --git a/GloFAS/GloFAS_prep/flood_definer.py b/GloFAS/GloFAS_prep/flood_definer.py new file mode 100644 index 0000000..fd0d07a --- /dev/null +++ b/GloFAS/GloFAS_prep/flood_definer.py @@ -0,0 +1,20 @@ +from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat +floodProbability_path = self.DataDir/ f"floodedRP{self.RPyr}yr_leadtime{self.leadtime}_ADM{self.adminLevel}.gpkg" + +def stampTrigger (floodProbability_gdf, actionLifetime, triggerProb): + '''defines trigger by the floodprobability that it needs to exceed, as well as writes the timeduration at which the trigger is valid + actionLifetime = actionLifetime in days''' + + # make everything capital + floodProbability_gdf[f'ADM{self.adminLevel}'] = floodProbability_gdf[f'ADM{self.adminLevel}'].apply(lambda x: unidecode.unidecode(x).upper()) + + + floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') # - timedelta(days=self.buffervalid) + # incorporate actionlifetime + floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['ValidTime'], errors='coerce') + timedelta(days=actionLifetime) + floodCommune_gdf['StartValidTime'] = pd.to_datetime(floodCommune_gdf['StartValidTime'], format='%d/%m/%Y', errors='coerce') + floodCommune_gdf['EndValidTime'] = pd.to_datetime(floodCommune_gdf['EndValidTime'], format='%d/%m/%Y', errors='coerce') + # Add Trigger column based on probability threshold + floodCommune_gdf['Trigger'] = np.where(floodCommune_gdf['Probability'] >= triggerProb, 1, 0) + + return floodCommune_gdf