Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Dec 12, 2024
2 parents cfaad37 + 59e2426 commit 5c8aae5
Show file tree
Hide file tree
Showing 76 changed files with 140,123 additions and 33 deletions.
49 changes: 36 additions & 13 deletions GloFAS/GloFAS_analysis/probability_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@ class FloodProbabilityProcessor:
def __init__(self,
DataDir=cfg.DataDir,
leadtime=168,
RPyr=5,
area=cfg.MaliArea,
lakesPath=cfg.lakesPath,
crs=cfg.crs,
adminLevel=None,
GloFAS_stations = cfg.GloFASstations,
forecastType='reforecast',
measure='max',
start_date=None,
end_date=None,
nrCores=4,
comparisonShape='polygon'):
comparisonShape='polygon',
):

# start and end_date are only necessary if your forecastType is not 'reforecast' but 'forecast'

Expand All @@ -39,41 +42,42 @@ def __init__(self,
self.MONTHSDAYS = get_monthsdays()
elif forecastType == 'forecast':
self.MONTHSDAYSYEARS = compute_dates_range(start_date, end_date)

# Initializations that rely on data
self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '07')
self.reference_rasterPath = unzipGloFAS(self.DataDir, self.leadtime, '01', '08')
self.reference_Q_da = openGloFAS(self.reference_rasterPath, self.lakesPath, self.crs)
self.threshold_da = openThreshold(self.DataDir, self.crs, self.RPyr, self.area, self.reference_Q_da)

self.comparisonShape = comparisonShape
if comparisonShape == 'polygon':
self.measure = measure
self.threshold_gdf = aggregation(self.threshold_da, adminPath, 'polygon', measure=self.measure)
self.threshold_gdf = aggregation(self.threshold_da, vector=adminPath, method='polygon', measure=self.measure)
self.adminLevel = adminLevel

elif comparisonShape == 'point':
self.threshold_gdf = aggregation(self.threshold_da, GloFAS_stations)
self.threshold_gdf = aggregation(self.threshold_da, vector=GloFAS_stations, method='point') # aggregating the threshold value on the GloFAS station
self.nrCores = nrCores

def exceedance(self, Q_da, threshold_gdf, nrEnsemble):
def exceedance(self, Q_da, threshold_gdf, nrEnsemble, timestamp):
'''Check exceedance of threshold values for a single ensemble member
threshold_gdf should be in same shape as eventual outcme, so if comparisonShape=polygon, threshold_gdf should be in polygon'''
if self.comparisonShape=='polygon':
GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure)
GloFAS_gdf = aggregation(Q_da, threshold_gdf, 'polygon', nrEnsemble, measure=self.measure, timestamp=timestamp)
exceedance = GloFAS_gdf[f'{self.measure}'] > threshold_gdf[f'{self.measure}']
elif self.comparisonShape=='point': # assuming this then refers to aggregation to a point
GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble)
GloFAS_gdf = aggregation (Q_da, threshold_gdf, 'point', nrEnsemble=nrEnsemble, timestamp=timestamp)
exceedance = GloFAS_gdf ['rastervalue'] > threshold_gdf['rastervalue']
else:
raise ValueError (f"Not a valid shape : {self.comparisonShape}, pick between 'polygon' and 'point'")
return exceedance

def probability(self, Q_da, threshold_gdf, nrCores):
def probability(self, Q_da, threshold_gdf, timestamp):
'''Calculate exceedance probability across ensemble members for a single time step'''
nrMembers = len(Q_da.number)
exceedance_count = np.zeros(len(threshold_gdf), dtype=int)

# Calculate exceedances for each ensemble in parallel
results = Parallel(n_jobs=nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble) for ensemble in Q_da.number)
results = Parallel(n_jobs=self.nrCores)(delayed(self.exceedance)(Q_da, threshold_gdf, ensemble, timestamp) for ensemble in Q_da.number)
for result in results:
exceedance_count += result
probability = exceedance_count / nrMembers
Expand All @@ -84,19 +88,19 @@ def probability(self, Q_da, threshold_gdf, nrCores):
flooded_df['Probability'] = probability
return flooded_df

def concatenate_prob(self, Q_da, nrCores):
def concatenate_prob(self):
'''Concatenate exceedance probability calculations over multiple time steps'''
all_years_data = []
if self.forecastType=='reforecast':
for month, day in self.MONTHSDAYS:
print(f"Starting for {month}, {day}")
Q_da_rasterpath = unzipGloFAS (self.DataDir, self.leadtime, month, day)
Q_da = openGloFAS(Q_da_rasterpath, self.leadtime) # Open data for the specific month/day
for timestamp in Q_da.time: # Iterate over years
for timestamp in Q_da.time: #Iterate over years
startLoop = datetime.now()
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
flooded_df = self.probability(Q_da, self.threshold_gdf, timestamp)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
Expand All @@ -111,7 +115,7 @@ def concatenate_prob(self, Q_da, nrCores):
Q_da = openGloFAS(Q_da_rasterpath, lakesPath=self.lakesPath, crs=self.crs)
valid_timestamp = pd.to_datetime(str(timestamp.values)) + timedelta(hours=self.leadtime)
# Calculate probability
flooded_df = self.probability(Q_da, self.threshold_gdf, nrCores)
flooded_df = self.probability(Q_da, self.threshold_gdf)
flooded_df['ValidTime'] = valid_timestamp # Add timestamp
all_years_data.append(flooded_df)
print(f"Time step {valid_timestamp} done, took: {datetime.now() - startLoop}")
Expand All @@ -124,3 +128,22 @@ def concatenate_prob(self, Q_da, nrCores):
output_path = f"{self.DataDir}/floodedRP{self.RPyr}yr_leadtime{self.leadtime}.gpkg"
floodProb_gdf_concat.to_file(output_path, driver="GPKG")
return floodProb_gdf_concat

if __name__ == '__main__':
for leadtime in cfg.leadtimes:
for RPyr in cfg.RPsyr:
prob = FloodProbabilityProcessor(DataDir=cfg.DataDir,
leadtime=leadtime,
RPyr=RPyr,
area=cfg.MaliArea,
lakesPath=cfg.lakesPath,
crs=cfg.crs,
adminLevel=None,
GloFAS_stations=cfg.GloFASstations,
forecastType='reforecast',
measure=None,
start_date=None,
end_date=None,
nrCores=8,
comparisonShape='point')
prob.concatenate_prob()
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_data_extractor/threshold_opener.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import rioxarray as rio

def openThreshold(DataDir, crs, RPyr, area, Q_da_forecast ):
threshold_ds = xr.load_dataset(DataDir / f"flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
threshold_ds = xr.load_dataset(DataDir / f"auxiliary/flood_threshold_glofas_v4_rl_{RPyr:.1f}.nc")
threshold_ds.rio.write_crs(crs, inplace=True)

threshold_da = threshold_ds[f"rl_{RPyr:.1f}"].sel(
Expand Down
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_prep/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def aggregation (rasterDA, vector, method, nrEnsemble=None, timestamp=None, meas
rasterDA = rasterDA.sel(time=timestamp)
# aggregation is about the model's location in relation to the model's output raster, therefore placement is in the model's framework
# this argument is ignored when considering polygons, because on that level precision doesnt really matter
vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='model')
vectorGDF = checkVectorFormat(vector, shapeType=method, crs=rasterDA.rio.crs, placement='real')
if method == 'point':
pointgdf = query(rasterDA, vectorGDF)
return pointgdf
Expand Down
6 changes: 3 additions & 3 deletions GloFAS/GloFAS_prep/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import os
################################ GLOFAS ###############################################

os.chdir (f'C:\\Users\\els-2\\')
os.chdir (f'C:/Users/els-2/')
cur = Path.cwd()
DataDir = cur / 'MaliGloFAS\\data'
DataDir = cur / 'MaliGloFAS/data'
# Mali area coordinates as in notation as used by GloFAS (which is very weird)
# north, west, south, east
MaliArea = [25, -12.25, 10, 4.25] # Mali area [lat_max, lon_min, lat_min, lon_max] (upper left corner , down right corner)
Expand All @@ -18,7 +18,7 @@
stationsDir = DataDir / f'stations'
DNHstations = stationsDir / f"Stations_DNH.csv"
googlestations = stationsDir / 'coords_google_gauges_Mali.csv'
GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea_50km.csv'
GloFASstations = stationsDir / 'GloFAS_to_DNH_resembling_uparea.csv'
impact_csvPath = DataDir / "impact/MergedImpactData.csv"
settlements_tif = DataDir / "GlobalHumanSettlement/GHS_BUILT_S_E2030_GLOBE_R2023A_54009_100_V1_0.tif"

Expand Down
4 changes: 2 additions & 2 deletions GloFAS/GloFAS_prep/station_to_glofas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
ups_area_point_m=row['Catchment area (km2)'] * 1e6, # Convert from km² to m²
stationName=row['Station names'],
glofas_ups_area_xr=glofas_ups_area_da,
radius_m=100000
radius_m=10000
)
stations_upsarea_df.loc[i, 'Glofas_Point_X'] = glofas_x
stations_upsarea_df.loc[i, 'Glofas_Point_Y'] = glofas_y
stations_upsarea_df.loc[i, 'Area_Diff'] = area_diff

# Print or save the updated DataFrame
print(stations_upsarea_df.head())
stations_upsarea_df.to_csv(f'{cfg.DataDir}/stations_Glofas_coordinates.csv')
stations_upsarea_df.to_csv(f'{cfg.DataDir}/stations/stations_Glofas_coordinates.csv')
Loading

0 comments on commit 5c8aae5

Please sign in to comment.