Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
valentijn7 committed Dec 4, 2024
2 parents d15bbb0 + 4bda33d commit 25903d3
Show file tree
Hide file tree
Showing 4 changed files with 472 additions and 75 deletions.
194 changes: 194 additions & 0 deletions GloFAS/GloFAS_prep/thresholds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""
Copyright 2019-2023 European Union
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence");
You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at:
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence.
"""

import argparse
import os
import sys

import numpy as np
import xarray as xr


def lmoments(values: np.ndarray) -> np.ndarray:
"""
Compute first 2 L-moments of dataset (on first axis)
:param values: N-D array of values
:return: an estimate of the first two sample L-moments
"""

nmoments = 3

# we need to have at least four values in order
# to make a sample L-moments estimation
nvalues = values.shape[0]
if nvalues < 4:
raise ValueError(
"Insufficient number of values to perform sample L-moments estimation"
)

# sort the values into ascending order
values = np.sort(values, axis=0)

sums = np.zeros((nmoments, *(values.shape[1:])))

for i in range(1, nvalues + 1):
z = i
term = values[i - 1]
sums[0] = sums[0] + term
for j in range(1, nmoments):
z -= 1
term = term * z
sums[j] = sums[j] + term

y = float(nvalues)
z = float(nvalues)
sums[0] = sums[0] / z
for j in range(1, nmoments):
y = y - 1.0
z = z * y
sums[j] = sums[j] / z

k = nmoments
p0 = -1.0
for _ in range(2):
ak = float(k)
p0 = -p0
p = p0
temp = p * sums[0]
for i in range(1, k):
ai = i
p = -p * (ak + ai - 1.0) * (ak - ai) / (ai * ai)
temp = temp + (p * sums[i])
sums[k - 1] = temp
k = k - 1

lmoments = np.zeros((2, *(values.shape[1:])))
lmoments[0] = sums[0]
lmoments[1] = sums[1]

return lmoments


def gumbel_parameters_moments(dis):
sigma = np.sqrt(6) * np.nanstd(dis, ddof=1, axis=0) / np.pi
mu = np.nanmean(dis, axis=0) - 0.5772 * sigma
return sigma, mu


def gumbel_parameters_lmoments(dis):
lambda_coef = lmoments(dis)
sigma = lambda_coef[1] / np.log(2)
mu = lambda_coef[0] - sigma * 0.5772
return sigma, mu


def gumbel_function(y, sigma, mu):
return mu - sigma * np.log(np.log(y / (y - 1)))


def find_main_var(ds, path):
variable_names = [k for k in ds.variables if len(ds.variables[k].dims) == 3]
if len(variable_names) > 1:
raise Exception("More than one variable in dataset {}".format(path))
elif len(variable_names) == 0:
raise Exception("Could not find a valid variable in dataset {}".format(path))
else:
var_name = variable_names[0]
return var_name


def read_discharge(in_files):
ds = xr.open_dataset(in_files)
var = find_main_var(ds, in_files)
da = ds[var]
return da


def unmask_array(mask, template, data):
data_unmask = np.empty_like(template)
data_unmask[...] = np.NaN
data_unmask[mask] = data
return data_unmask


def create_dataset(dis_max, return_periods, mask, thresholds, sigma, mu):
print("Creating dataset")
ds_rp = xr.Dataset(
coords={"lat": dis_max.coords["lat"], "lon": dis_max.coords["lon"]}
)
for i, rp in enumerate(return_periods):
thres = unmask_array(mask, dis_max.isel(time=0).values, thresholds[i])
ds_rp[f"rl_{rp}"] = (["lat", "lon"], thres)

s = unmask_array(mask, dis_max.isel(time=0).values, sigma)
print(s.shape)
ds_rp[f"sigma"] = (["lat", "lon"], s)
m = unmask_array(mask, dis_max.isel(time=0).values, mu)
print(m.shape)
ds_rp[f"mu"] = (["lat", "lon"], m)

print(ds_rp)

return ds_rp


def compute_thresholds_gumbel(dis_max, return_periods):
mask = np.isfinite(dis_max.isel(time=0).values)
dis_max_masked = dis_max.values[:, mask]

print("Computing Gumbel coefficients")
sigma, mu = gumbel_parameters_lmoments(dis_max_masked)

print("Computing return periods")
thresholds = []
for y in return_periods:
dis = gumbel_function(y, sigma, mu)
thresholds.append(dis)

ds_rp = create_dataset(dis_max, return_periods, mask, thresholds, sigma, mu)

return ds_rp


def main(argv=sys.argv):
prog = os.path.basename(argv[0])
parser = argparse.ArgumentParser(
description="""
Utility to compute the discharge return period thresholds
using the method of L-moments.
Thresholds computed: [1.5, 2, 5, 10, 20, 50, 100, 200, 500]
""",
prog=prog,
)
parser.add_argument(
"-d", "--discharge", help="Input discharge files (annual maxima)"
)
parser.add_argument("-o", "--output", help="Output thresholds file")

args = parser.parse_args()

dis = read_discharge(args.discharge)
print(dis)

return_periods = np.array([1.5, 2, 5, 10, 20, 50, 100, 200, 500])

thresholds = compute_thresholds_gumbel(dis, return_periods)

thresholds.to_netcdf(args.output)


def main_script():
sys.exit(main())


if __name__ == "__main__":
main_script()
59 changes: 36 additions & 23 deletions PTM/PTM_analysis/performance_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def openObservation_gdf(self):
# Check the first few rows
# Filter rows between 2004 and 2022 ()
df_filtered = df[(df['End Date'].dt.year >= self.startYear) & (df['End Date'].dt.year < self.endYear)]
print (df_filtered.head)
# Remove non-string entries from ADM columns
df_filtered = df_filtered[df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: isinstance(x, str))]

Expand Down Expand Up @@ -300,17 +299,31 @@ def calc_performance_scores(self, obs, pred):
false_alarms += 1 # there is no use in calculating correct negatives as there are many
print (f'hits: {hits}, misses: {misses}, false alarms: {false_alarms}, total {comparisonType} = {sum(obs==1)}, should be equal to {misses}+{hits}={misses+hits}')
# Calculate metrics
output = {
'pod': hits / (hits + misses) if hits + misses != 0 else np.nan, # Probability of Detection
'far': false_alarms / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, # False Alarm Ratio
#'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection
'csi': hits / (hits + false_alarms + misses) if hits + false_alarms + misses != 0 else np.nan, # Critical Success Index
# 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy
'precision': hits / (hits + false_alarms) if hits + false_alarms != 0 else np.nan,
'TP': hits,
'FN': misses,
'FP': false_alarms
}
sum_predictions = false_alarms + hits
if sum_predictions == 0:
output = {
'pod': np.nan, # Probability of Detection
'far': np.nan, # False Alarm Ratio
#'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection
'csi': np.nan, # Critical Success Index
# 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy
'precision': np.nan,
'TP': hits,
'FN': misses,
'FP': false_alarms
}
else:
output = {
'pod': hits / (hits + misses) if hits + misses != 0 else np.nan, # Probability of Detection
'far': false_alarms / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, # False Alarm Ratio
#'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection
'csi': hits / (hits + false_alarms + misses) if hits + false_alarms + misses != 0 else np.nan, # Critical Success Index
# 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy
'precision': hits / (hits + false_alarms) if hits + false_alarms != 0 else np.nan,
'TP': hits,
'FN': misses,
'FP': false_alarms
}

return pd.Series(output)

Expand All @@ -333,17 +346,17 @@ def calculateCommunePerformance(self):
return scores_byCommune_gdf

if __name__=='__main__':
# impact_csv = f'{cfg.DataDir}/Impact_data/impact_events_per_admin_529.csv'
# comparisonType ='Impact'
# for RPyr in cfg.RPsyr:
# # PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv'
# ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos)
# PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr)
# for leadtime in cfg.leadtimes:
# # print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
# analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, impact_csv, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PTM_events_per_adm, comparisonType, cfg.actionLifetime, 'PTM', leadtime)
# analyzer.matchImpact_and_Trigger()
# analyzer.calculateCommunePerformance()
impact_csv = f'{cfg.DataDir}/Impact_data/impact_events_per_admin_529.csv'
comparisonType ='Impact'
for RPyr in cfg.RPsyr:
# PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv'
ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos)
PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr)
for leadtime in cfg.leadtimes:
# print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, impact_csv, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PTM_events_per_adm, comparisonType, cfg.actionLifetime, 'PTM', leadtime)
analyzer.matchImpact_and_Trigger()
analyzer.calculateCommunePerformance()

comparisonType ='Observation'
for RPyr in cfg.RPsyr:
Expand Down
52 changes: 41 additions & 11 deletions comparison/observation/HydroImpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import scipy.stats as stats
from comparison.pointMatching import attributePoints_to_Polygon
import geopandas as gpd
from scipy.stats import genextreme

def parse_date_with_fallback(date_str, year):
try:
# Try to parse the date with the given year
Expand Down Expand Up @@ -58,16 +60,43 @@ def z_RP_station(HydroStations_RP_file, StationName, RP):

def QRP_fit (hydro_df, RP):

# 1. Extract the annual maximum discharge values
# Extract the annual maximum discharge values
hydro_df['Year'] = hydro_df['Date'].dt.year
annual_max_discharge = hydro_df.groupby('Year')['Value'].max()

# 2. Fit a Gumbel distribution to the annual maximum discharge values
# Fit a Gumbel distribution to the annual maximum discharge values
loc, scale = stats.gumbel_r.fit(annual_max_discharge)
# 4. Calculate the discharge value corresponding to the return period
# Calculate the discharge value corresponding to the return period
discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale)
return discharge_value

def Q_GEV_fit_daily(hydro_df, percentile):
"""
Fits a GEV distribution to the daily discharge values and calculates the discharge
corresponding to a given percentile.
Parameters:
hydro_df (pd.DataFrame): A dataframe with at least 'Date' and 'Value' columns.
'Date' should be a datetime object and 'Value' is the discharge value.
percentile (float): Percentile for which to compute the discharge value (between 0 and 100).
Returns:
float: The discharge value corresponding to the given percentile.
"""
# Ensure 'Date' column is a datetime object
hydro_df['Date'] = pd.to_datetime(hydro_df['Date'])

# Extract daily discharge values
daily_discharge = hydro_df['Value']

# Fit a GEV distribution to the daily discharge values
shape, loc, scale = genextreme.fit(daily_discharge)

# Calculate the discharge value corresponding to the specified percentile
discharge_value = genextreme.ppf(percentile / 100, shape, loc=loc, scale=scale)

return discharge_value


def stampHydroTrigger(hydro_df, RP, StationName):
"""
Expand All @@ -89,8 +118,11 @@ def stampHydroTrigger(hydro_df, RP, StationName):
#QRP = QRP_station(HydroStations_RP_file, StationName, RP)
Q_station = hydro_df["Value"]


QRP= QRP_fit (hydro_df, RP)
if RP < 21:
QRP= QRP_fit (hydro_df, RP)
else: # assuming above 20 is percentile, RP is percentile instead
print ('applying a GEV, assuming your RP is in percentiles')
QRP = Q_GEV_fit (hydro_df, RP)
#print (f"for {StationName} : return period Q= {QRP}")
if not isinstance(QRP, (int, float)):
raise ValueError(f"Expected QRP to be a scalar (int or float), but got {type(QRP)}.")
Expand All @@ -100,7 +132,6 @@ def stampHydroTrigger(hydro_df, RP, StationName):
# Copy the DataFrame and add the 'trigger' column
hydro_trigger_df = hydro_df.copy()
hydro_trigger_df['Trigger'] = (hydro_trigger_df['Value'] > QRP).astype(int)

return hydro_trigger_df

def createEvent(trigger_df):
Expand All @@ -109,7 +140,6 @@ def createEvent(trigger_df):
trigger_df = trigger_df.sort_values(by=[f'Date']).reset_index(drop=True)

# Prepare commune info with geometry for event creation

event_data = []

# Keep track of which rows have been processed to avoid rechecking
Expand Down Expand Up @@ -213,7 +243,7 @@ def events_per_adm (DataDir, admPath, adminLevel, station_csv, StationDataDir, a


if __name__=="__main__":
for RP in cfg.RPsyr:
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_df = loop_over_stations (cfg.DNHstations, cfg.DataDir, RP)
event_gdf = events_per_adm (cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, all_events_df, 'Observation', RP)

#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_df = loop_over_stations (cfg.DNHstations, cfg.DataDir, 95)
event_gdf = events_per_adm (cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, event_df, 'Observation', 95)
Loading

0 comments on commit 25903d3

Please sign in to comment.