Skip to content

Commit

Permalink
calculating probability as executed
Browse files Browse the repository at this point in the history
  • Loading branch information
ERKuipers committed Dec 12, 2024
1 parent b2e0cb4 commit 59e2426
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 15 deletions.
27 changes: 15 additions & 12 deletions GloFAS/GloFAS_analysis/probability_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ def __init__(self,
lakesPath=cfg.lakesPath,
crs=cfg.crs,
adminLevel=None,
GloFAS_stations = cfg.GloFASstations,
forecastType='reforecast',
measure='max',
start_date=None,
end_date=None,
nrCores=4,
comparisonShape='polygon'):
comparisonShape='polygon',
):

# start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'

Expand All @@ -49,33 +51,33 @@ def __init__(self,
self.comparisonShape = comparisonShape
if comparisonShape == 'polygon':
self.measure = measure
self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=self.measure)
self.threshold_gdf = aggregation(self.threshold_da, vector=adminPath, method='polygon', measure=self.measure)
self.adminLevel = adminLevel

elif comparisonShape == 'point':
self.threshold_gdf = aggregation(self.threshold_da, GloFAS_stations) # aggregating the threshold value on the GloFAS station
self.threshold_gdf = aggregation(self.threshold_da, vector=GloFAS_stations, method='point') # aggregating the threshold value on the GloFAS station
self.nrCores = nrCores

def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
def exceedance(self, Q_da, threshold_gdf, nrEnsemble, timestamp):
'''Check exceedance of threshold values for a single ensemble member
threshold_gdf should be in same shape as eventual outcme, so if comparisonShape=polygon, threshold_gdf should be in polygon'''
if self.comparisonShape=='polygon':
GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure, timestamp=timestamp)
exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
elif self.comparisonShape=='point': # assuming this then refers to aggregation to a point
GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble)
GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble=nrEnsemble, timestamp=timestamp)
exceedance = GloFAS_gdf ['rastervalue'] > threshold_gdf['rastervalue']
else:
raise ValueError (f"Not a valid shape : {self.comparisonShape}, pick between 'polygon' and 'point'")
return exceedance

def probability(self, Q_da, threshold_gdf):
def probability(self, Q_da, threshold_gdf, timestamp):
'''Calculate exceedance probability across ensemble members for a single time step'''
nrMembers = len(Q_da.number)
exceedance_count = np.zeros(len(threshold_gdf), dtype=int)

# Calculate exceedances for each ensemble in parallel
results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble, timestamp) for ensemble in Q_da.number)
for result in results:
exceedance_count += result
probability = exceedance_count / nrMembers
Expand All @@ -94,11 +96,11 @@ def concatenate_prob(self):
print(f"Starting for {month}, {day}")
Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
Q_da = openGloFAS(Q_da_rasterpath, self.leadtime) # Open data for the specific month/day
for timestamp in Q_da.time: # Iterate over years
for timestamp in Q_da.time: #Iterate over years
startLoop = datetime.now()
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, self.nrCores)
flooded_df = self.probability(Q_da, self.threshold_gdf, timestamp)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
Expand All @@ -113,7 +115,7 @@ def concatenate_prob(self):
Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, self.nrCores)
flooded_df = self.probability(Q_da, self.threshold_gdf)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
Expand All @@ -137,10 +139,11 @@ def concatenate_prob(self):
lakesPath=cfg.lakesPath,
crs=cfg.crs,
adminLevel=None,
GloFAS_stations=cfg.GloFASstations,
forecastType='reforecast',
measure=None,
start_date=None,
end_date=None,
nrCores=8,
comparisonShape='point')
prob.concatenate_prob(Q_da)
prob.concatenate_prob()
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_data_extractor/threshold_opener.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import rioxarray as rio

def openThreshold(DataDir, crs, RPyr, area, Q_da_forecast ):
threshold_ds = xr.load_dataset(DataDir / f"flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
threshold_ds = xr.load_dataset(DataDir / f"auxiliary/flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
threshold_ds.rio.write_crs(crs, inplace=True)

threshold_da = threshold_ds[f"rl_{RPyr:.1f}"].sel(
Expand Down
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_prep/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def aggregation (rasterDA, vector, method, nrEnsemble=None, timestamp=None, meas
rasterDA = rasterDA.sel(time=timestamp)
# aggregation is about the model's location in relation to the model's output raster, therefore placement is in the model's framework
# this argument is ignored when considering polygons, because on that level precision doesnt really matter
vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='model')
vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='real')
if method == 'point':
pointgdf = query(rasterDA, vectorGDF)
return pointgdf
Expand Down
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_prep/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
stationsDir = DataDir / f'stations'
DNHstations = stationsDir / f"Stations_DNH.csv"
googlestations = stationsDir / 'coords_google_gauges_Mali.csv'
GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea_50km.csv'
GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea.csv'
impact_csvPath = DataDir / "impact/MergedImpactData.csv"
settlements_tif = DataDir / "GlobalHumanSettlement/GHS_BUILT_S_E2030_GLOBE_R2023A_54009_100_V1_0.tif"

Expand Down

0 comments on commit 59e2426

Please sign in to comment.