Skip to content

Commit

Permalink
thresholds
Browse files Browse the repository at this point in the history
  • Loading branch information
ERKuipers committed Dec 13, 2024
1 parent 59e2426 commit df5fb5b
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 53 deletions.
6 changes: 5 additions & 1 deletion GloFAS/GloFAS_prep/Q_timeseries_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from GloFAS.GloFAS_data_extractor.threshold_opener import openThreshold
from GloFAS.GloFAS_data_extractor.unzip_open import openGloFAS, unzipGloFAS
import GloFAS.GloFAS_prep.configuration as cfg

class GloFAS_timeseries:
def __init__(self,
DataDir,
Expand Down Expand Up @@ -99,8 +100,11 @@ def Q_over_time(self):
station_results_df = pd.DataFrame(station_results)
station_results_df.to_csv(f'{self.DataDir}/stations/timeseries/discharge_timeseries_{stationname}_{self.leadtime}.csv', index=False)
print(f"Finished station {stationname}")





if __name__ =='__main__':
for leadtime in cfg.leadtimes:
timeseries = GloFAS_timeseries(DataDir=cfg.DataDir,
Expand All @@ -113,4 +117,4 @@ def Q_over_time(self):
end_date=None,
IDhead='Station names',
percentile=(100-(cfg.triggerProb*100))) # percentile is 1 - the probability of exceedence. At 40 percent percentile, tehre is a 60 procent probability of this value being exceeded
timeseries.Q_over_time()
Q_timeseries.Q_over_time()
2 changes: 1 addition & 1 deletion GloFAS/GloFAS_prep/station_to_glofas.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
glofas_x, glofas_y, area_diff = find_corresponding_point_within_box(
station_lon=row['Lon'],
station_lat=row['Lat'],
ups_area_point_m=row['Catchment area (km2)'] * 1e6, # Convert from km² to m²
ups_area_point_m=row['Catchment area (km2)']*1e6, # Convert from km² to m²
stationName=row['Station names'],
glofas_ups_area_xr=glofas_ups_area_da,
radius_m=10000
Expand Down
56 changes: 6 additions & 50 deletions comparison/observation/HydroImpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from comparison.pointMatching import attributePoints_to_Polygon
import geopandas as gpd
import numpy as np
from pyextremes import EVA
from scipy.stats import genextreme

def parse_date_with_fallback(date_str, year):
Expand Down Expand Up @@ -59,54 +60,9 @@ def z_RP_station(HydroStations_RP_file, StationName, RP):

return z_RP

def QRP_fit (hydro_df, RP):

# Extract the annual maximum discharge values
hydro_df['Year'] = hydro_df['Date'].dt.year
annual_max_discharge = hydro_df.groupby('Year')['Value'].max()

# Fit a Gumbel distribution to the annual maximum discharge values
loc, scale = stats.gumbel_r.fit(annual_max_discharge)
# Calculate the discharge value corresponding to the return period
discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale)
return discharge_value

def Q_GEV_fit(hydro_df, percentile):
"""
Fits a GEV distribution to the daily discharge values and calculates the discharge
corresponding to a given percentile.
Parameters:
hydro_df (pd.DataFrame): A dataframe with at least 'Date' and 'Value' columns.
'Date' should be a datetime object and 'Value' is the discharge value.
percentile (float): Percentile for which to compute the discharge value (between 0 and 100).
Returns:
float: The discharge value corresponding to the given percentile.
"""
# Ensure 'Date' column is a datetime object
hydro_df['Date'] = pd.to_datetime(hydro_df['Date'])

# Extract daily discharge values
daily_discharge = hydro_df['Value']
# Remove non-finite values
daily_discharge_cleaned = daily_discharge[np.isfinite(daily_discharge)]

# Check if there are still issues
if daily_discharge_cleaned.empty:
raise ValueError("All data was non-finite after cleaning. Please check your dataset.")

# Fit a GEV distribution
shape, loc, scale = genextreme.fit(daily_discharge_cleaned)


# Calculate the discharge value corresponding to the specified percentile
discharge_value = genextreme.ppf(percentile / 100, shape, loc=loc, scale=scale)

return discharge_value


def stampHydroTrigger(hydro_df, RP, StationName):
def stampHydroTrigger(hydro_df, StationName, temporality_of_extremity, probability, distributionType):
"""
Adds a 'trigger' column to the hydro_df DataFrame indicating whether the 'Value' exceeds the QRP threshold.
Expand All @@ -126,8 +82,8 @@ def stampHydroTrigger(hydro_df, RP, StationName):
#QRP = QRP_station(HydroStations_RP_file, StationName, RP)
Q_station = hydro_df["Value"]

if RP < 21:
QRP= QRP_fit (hydro_df, RP)
if distributionType =='Gumbel':
QRP= QRP_fit (hydro_df, probability)
else: # assuming above 20 is percentile, RP is percentile instead
print ('applying a GEV, assuming your RP is in percentiles')
QRP = Q_GEV_fit (hydro_df, RP)
Expand Down Expand Up @@ -206,7 +162,7 @@ def loop_over_stations(station_csv, DataDir, RP):
RP = float(RP)
station_df = pd.read_csv (station_csv, header=0)
#print (station_df.columns)
hydrodir = rf"{DataDir}/DNHMali_2019\Q_stations"
hydrodir = rf"{DataDir}/DNHMali_2019/Q_stations"
all_events = []
for _, stationrow in station_df.iterrows():
StationName = stationrow['StationName']
Expand Down Expand Up @@ -252,7 +208,7 @@ def events_per_adm (DataDir, admPath, adminLevel, station_csv, StationDataDir, a


if __name__=="__main__":

# running this script for the GloFAS
#print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
event_df = loop_over_stations (cfg.DNHstations, cfg.DataDir, 1)
event_gdf = events_per_adm (cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, event_df, 'Observation', 1)
97 changes: 97 additions & 0 deletions comparison/observation/thresholds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pandas as pd
import GloFAS.GloFAS_prep.configuration as cfg
import pyextremes as pe
from pyextremes import EVA
from scipy.stats import genextreme

def csv_to_cleaned_series (csv):
df = pd.read_csv(
Q_Bamako_csv,
index_col=0, # setting ValidTime as index column
parse_dates=True, # parsing dates in ValidTime
)

df['percentile_40.0'] = df['percentile_40.0'].interpolate(method='time')
series = df['percentile_40.0']
series = series.loc[series.index < pd.to_datetime('2023-01-01')]
series = (
series
.sort_index(ascending=True)
.astype(float)
.dropna()
)
return series
def Q_Gumbel_fit_RP (hydro_df, RP):

# Extract the annual maximum discharge values
hydro_df['Year'] = hydro_df['Date'].dt.year
annual_max_discharge = hydro_df.groupby('Year')['Value'].max()

# Fit a Gumbel distribution to the annual maximum discharge values
loc, scale = stats.gumbel_r.fit(annual_max_discharge)
# Calculate the discharge value corresponding to the return period
discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale)
return discharge_value

def Q_Gumbel_fit_percentile (hydro_df, percentile):
# now for
return discharge_value

def Q_GEV_fit_RP (hydro_df, RP):
return discharge_value

def Q_GEV_fit_percentile (hydro_df, percentile):
"""
Fits a GEV distribution to the daily discharge values and calculates the discharge
corresponding to a given percentile.
Parameters:
hydro_df (pd.DataFrame): A dataframe with at least 'Date' and 'Value' columns.
'Date' should be a datetime object and 'Value' is the discharge value.
percentile (float): Percentile for which to compute the discharge value (between 0 and 100).
Returns:
float: The discharge value corresponding to the given percentile.
"""
# Ensure 'Date' column is a datetime object
hydro_df['Date'] = pd.to_datetime(hydro_df['Date'])

# Extract daily discharge values
daily_discharge = hydro_df['Value']
# Remove non-finite values
daily_discharge_cleaned = daily_discharge[np.isfinite(daily_discharge)]

# Check if there are still issues
if daily_discharge_cleaned.empty:
raise ValueError("All data was non-finite after cleaning. Please check your dataset.")

# Fit a GEV distribution
shape, loc, scale = genextreme.fit(daily_discharge_cleaned)


# Calculate the discharge value corresponding to the specified percentile
discharge_value = genextreme.ppf(percentile / 100, shape, loc=loc, scale=scale)

return discharge_value

if __name__ == '__main__':
station = 'Bamako'
leadtime = 168
Q_Bamako_csv = f"{cfg.stationsDir}/GloFAS_Q/timeseries/discharge_timeseries_{station}_{leadtime}.csv"
series = csv_to_cleaned_series(Q_Bamako_csv)
model = EVA(series)
model.get_extremes(
method='BM', # Block Maxima method
block_size="365D", # One year per block
)

#model.plot_extremes()
gev_fit = model.fit_model()
model.plot_diagnostic(alpha=0.95)
summary = model.get_summary(
return_period=[1, 1.5, 2, 5, 10, 25, 50, 100, 250, 500, 1000],
alpha=0.95, # confidence interval
n_samples=1000,
)
print (summary)

2 changes: 1 addition & 1 deletion comparison/pointMatching.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ def find_corresponding_point_within_box(station_lon, station_lat, ups_area_point
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.legend(loc="upper right", fontsize=10, frameon=True)
plt.show()
plt.savefig(f'{cfg.DataDir}/GloFAS_station_for_DNH_{stationName}.png')
plt.show()
plt.close()
return model_point_lon, model_point_lat, best_area_diff

Expand Down

0 comments on commit df5fb5b

Please sign in to comment.