diff --git a/GloFAS/GloFAS_prep/thresholds.py b/GloFAS/GloFAS_prep/thresholds.py new file mode 100644 index 0000000..2929853 --- /dev/null +++ b/GloFAS/GloFAS_prep/thresholds.py @@ -0,0 +1,194 @@ +""" +Copyright 2019-2023 European Union +Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); +You may not use this work except in compliance with the Licence. +You may obtain a copy of the Licence at: +https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt +Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licence for the specific language governing permissions and limitations under the Licence. + +""" + +import argparse +import os +import sys + +import numpy as np +import xarray as xr + + +def lmoments(values: np.ndarray) -> np.ndarray: + """ + Compute first 2 L-moments of dataset (on first axis) + :param values: N-D array of values + :return: an estimate of the first two sample L-moments + """ + + nmoments = 3 + + # we need to have at least four values in order + # to make a sample L-moments estimation + nvalues = values.shape[0] + if nvalues < 4: + raise ValueError( + "Insufficient number of values to perform sample L-moments estimation" + ) + + # sort the values into ascending order + values = np.sort(values, axis=0) + + sums = np.zeros((nmoments, *(values.shape[1:]))) + + for i in range(1, nvalues + 1): + z = i + term = values[i - 1] + sums[0] = sums[0] + term + for j in range(1, nmoments): + z -= 1 + term = term * z + sums[j] = sums[j] + term + + y = float(nvalues) + z = float(nvalues) + sums[0] = sums[0] / z + for j in range(1, nmoments): + y = y - 1.0 + z = z * y + sums[j] = sums[j] / z + + k = nmoments + p0 = -1.0 + for _ in range(2): + ak = float(k) + p0 = -p0 + p = p0 + temp = p * sums[0] + for i in range(1, k): + ai = i + p = -p * (ak + ai - 1.0) * (ak - ai) / (ai * ai) + temp = temp + (p * sums[i]) + sums[k - 1] = temp + k = k - 1 + + lmoments = np.zeros((2, *(values.shape[1:]))) + lmoments[0] = sums[0] + lmoments[1] = sums[1] + + return lmoments + + +def gumbel_parameters_moments(dis): + sigma = np.sqrt(6) * np.nanstd(dis, ddof=1, axis=0) / np.pi + mu = np.nanmean(dis, axis=0) - 0.5772 * sigma + return sigma, mu + + +def gumbel_parameters_lmoments(dis): + lambda_coef = lmoments(dis) + sigma = lambda_coef[1] / np.log(2) + mu = lambda_coef[0] - sigma * 0.5772 + return sigma, mu + + +def gumbel_function(y, sigma, mu): + return mu - sigma * np.log(np.log(y / (y - 1))) + + +def find_main_var(ds, path): + variable_names = [k for k in ds.variables if len(ds.variables[k].dims) == 3] + if len(variable_names) > 1: + raise Exception("More than one variable in dataset {}".format(path)) + elif len(variable_names) == 0: + raise Exception("Could not find a valid variable in dataset {}".format(path)) + else: + var_name = variable_names[0] + return var_name + + +def read_discharge(in_files): + ds = xr.open_dataset(in_files) + var = find_main_var(ds, in_files) + da = ds[var] + return da + + +def unmask_array(mask, template, data): + data_unmask = np.empty_like(template) + data_unmask[...] = np.NaN + data_unmask[mask] = data + return data_unmask + + +def create_dataset(dis_max, return_periods, mask, thresholds, sigma, mu): + print("Creating dataset") + ds_rp = xr.Dataset( + coords={"lat": dis_max.coords["lat"], "lon": dis_max.coords["lon"]} + ) + for i, rp in enumerate(return_periods): + thres = unmask_array(mask, dis_max.isel(time=0).values, thresholds[i]) + ds_rp[f"rl_{rp}"] = (["lat", "lon"], thres) + + s = unmask_array(mask, dis_max.isel(time=0).values, sigma) + print(s.shape) + ds_rp[f"sigma"] = (["lat", "lon"], s) + m = unmask_array(mask, dis_max.isel(time=0).values, mu) + print(m.shape) + ds_rp[f"mu"] = (["lat", "lon"], m) + + print(ds_rp) + + return ds_rp + + +def compute_thresholds_gumbel(dis_max, return_periods): + mask = np.isfinite(dis_max.isel(time=0).values) + dis_max_masked = dis_max.values[:, mask] + + print("Computing Gumbel coefficients") + sigma, mu = gumbel_parameters_lmoments(dis_max_masked) + + print("Computing return periods") + thresholds = [] + for y in return_periods: + dis = gumbel_function(y, sigma, mu) + thresholds.append(dis) + + ds_rp = create_dataset(dis_max, return_periods, mask, thresholds, sigma, mu) + + return ds_rp + + +def main(argv=sys.argv): + prog = os.path.basename(argv[0]) + parser = argparse.ArgumentParser( + description=""" + Utility to compute the discharge return period thresholds + using the method of L-moments. + Thresholds computed: [1.5, 2, 5, 10, 20, 50, 100, 200, 500] + """, + prog=prog, + ) + parser.add_argument( + "-d", "--discharge", help="Input discharge files (annual maxima)" + ) + parser.add_argument("-o", "--output", help="Output thresholds file") + + args = parser.parse_args() + + dis = read_discharge(args.discharge) + print(dis) + + return_periods = np.array([1.5, 2, 5, 10, 20, 50, 100, 200, 500]) + + thresholds = compute_thresholds_gumbel(dis, return_periods) + + thresholds.to_netcdf(args.output) + + +def main_script(): + sys.exit(main()) + + +if __name__ == "__main__": + main_script() \ No newline at end of file diff --git a/PTM/PTM_analysis/performance_calculator.py b/PTM/PTM_analysis/performance_calculator.py index 44b990b..3398fce 100644 --- a/PTM/PTM_analysis/performance_calculator.py +++ b/PTM/PTM_analysis/performance_calculator.py @@ -81,7 +81,6 @@ def openObservation_gdf(self): # Check the first few rows # Filter rows between 2004 and 2022 () df_filtered = df[(df['End Date'].dt.year >= self.startYear) & (df['End Date'].dt.year < self.endYear)] - print (df_filtered.head) # Remove non-string entries from ADM columns df_filtered = df_filtered[df_filtered[f'ADM{self.adminLevel}'].apply(lambda x: isinstance(x, str))] @@ -300,17 +299,31 @@ def calc_performance_scores(self, obs, pred): false_alarms += 1 # there is no use in calculating correct negatives as there are many print (f'hits: {hits}, misses: {misses}, false alarms: {false_alarms}, total {comparisonType} = {sum(obs==1)}, should be equal to {misses}+{hits}={misses+hits}') # Calculate metrics - output = { - 'pod': hits / (hits + misses) if hits + misses != 0 else np.nan, # Probability of Detection - 'far': false_alarms / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, # False Alarm Ratio - #'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection - 'csi': hits / (hits + false_alarms + misses) if hits + false_alarms + misses != 0 else np.nan, # Critical Success Index - # 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy - 'precision': hits / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, - 'TP': hits, - 'FN': misses, - 'FP': false_alarms - } + sum_predictions = false_alarms + hits + if sum_predictions == 0: + output = { + 'pod': np.nan, # Probability of Detection + 'far': np.nan, # False Alarm Ratio + #'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection + 'csi': np.nan, # Critical Success Index + # 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy + 'precision': np.nan, + 'TP': hits, + 'FN': misses, + 'FP': false_alarms + } + else: + output = { + 'pod': hits / (hits + misses) if hits + misses != 0 else np.nan, # Probability of Detection + 'far': false_alarms / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, # False Alarm Ratio + #'pofd': false_alarms / (false_alarms + correct_negatives) if false_alarms + correct_negatives != 0 else np.nan, # Probability of False Detection + 'csi': hits / (hits + false_alarms + misses) if hits + false_alarms + misses != 0 else np.nan, # Critical Success Index + # 'accuracy': (hits + correct_negatives) / (hits + correct_negatives + false_alarms + misses) if hits + correct_negatives + false_alarms + misses != 0 else np.nan, # Accuracy + 'precision': hits / (hits + false_alarms) if hits + false_alarms != 0 else np.nan, + 'TP': hits, + 'FN': misses, + 'FP': false_alarms + } return pd.Series(output) @@ -333,17 +346,17 @@ def calculateCommunePerformance(self): return scores_byCommune_gdf if __name__=='__main__': - # impact_csv = f'{cfg.DataDir}/Impact_data/impact_events_per_admin_529.csv' - # comparisonType ='Impact' - # for RPyr in cfg.RPsyr: - # # PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv' - # ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos) - # PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr) - # for leadtime in cfg.leadtimes: - # # print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv")) - # analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, impact_csv, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PTM_events_per_adm, comparisonType, cfg.actionLifetime, 'PTM', leadtime) - # analyzer.matchImpact_and_Trigger() - # analyzer.calculateCommunePerformance() + impact_csv = f'{cfg.DataDir}/Impact_data/impact_events_per_admin_529.csv' + comparisonType ='Impact' + for RPyr in cfg.RPsyr: + # PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv' + ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos) + PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr) + for leadtime in cfg.leadtimes: + # print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv")) + analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, impact_csv, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PTM_events_per_adm, comparisonType, cfg.actionLifetime, 'PTM', leadtime) + analyzer.matchImpact_and_Trigger() + analyzer.calculateCommunePerformance() comparisonType ='Observation' for RPyr in cfg.RPsyr: diff --git a/comparison/observation/HydroImpact.py b/comparison/observation/HydroImpact.py index e3f17be..818fc0f 100644 --- a/comparison/observation/HydroImpact.py +++ b/comparison/observation/HydroImpact.py @@ -5,6 +5,8 @@ import scipy.stats as stats from comparison.pointMatching import attributePoints_to_Polygon import geopandas as gpd +from scipy.stats import genextreme + def parse_date_with_fallback(date_str, year): try: # Try to parse the date with the given year @@ -58,16 +60,43 @@ def z_RP_station(HydroStations_RP_file, StationName, RP): def QRP_fit (hydro_df, RP): - # 1. Extract the annual maximum discharge values + # Extract the annual maximum discharge values hydro_df['Year'] = hydro_df['Date'].dt.year annual_max_discharge = hydro_df.groupby('Year')['Value'].max() - # 2. Fit a Gumbel distribution to the annual maximum discharge values + # Fit a Gumbel distribution to the annual maximum discharge values loc, scale = stats.gumbel_r.fit(annual_max_discharge) - # 4. Calculate the discharge value corresponding to the return period + # Calculate the discharge value corresponding to the return period discharge_value = stats.gumbel_r.ppf(1 - 1/RP, loc, scale) return discharge_value +def Q_GEV_fit_daily(hydro_df, percentile): + """ + Fits a GEV distribution to the daily discharge values and calculates the discharge + corresponding to a given percentile. + + Parameters: + hydro_df (pd.DataFrame): A dataframe with at least 'Date' and 'Value' columns. + 'Date' should be a datetime object and 'Value' is the discharge value. + percentile (float): Percentile for which to compute the discharge value (between 0 and 100). + + Returns: + float: The discharge value corresponding to the given percentile. + """ + # Ensure 'Date' column is a datetime object + hydro_df['Date'] = pd.to_datetime(hydro_df['Date']) + + # Extract daily discharge values + daily_discharge = hydro_df['Value'] + + # Fit a GEV distribution to the daily discharge values + shape, loc, scale = genextreme.fit(daily_discharge) + + # Calculate the discharge value corresponding to the specified percentile + discharge_value = genextreme.ppf(percentile / 100, shape, loc=loc, scale=scale) + + return discharge_value + def stampHydroTrigger(hydro_df, RP, StationName): """ @@ -89,8 +118,11 @@ def stampHydroTrigger(hydro_df, RP, StationName): #QRP = QRP_station(HydroStations_RP_file, StationName, RP) Q_station = hydro_df["Value"] - - QRP= QRP_fit (hydro_df, RP) + if RP < 21: + QRP= QRP_fit (hydro_df, RP) + else: # assuming above 20 is percentile, RP is percentile instead + print ('applying a GEV, assuming your RP is in percentiles') + QRP = Q_GEV_fit (hydro_df, RP) #print (f"for {StationName} : return period Q= {QRP}") if not isinstance(QRP, (int, float)): raise ValueError(f"Expected QRP to be a scalar (int or float), but got {type(QRP)}.") @@ -100,7 +132,6 @@ def stampHydroTrigger(hydro_df, RP, StationName): # Copy the DataFrame and add the 'trigger' column hydro_trigger_df = hydro_df.copy() hydro_trigger_df['Trigger'] = (hydro_trigger_df['Value'] > QRP).astype(int) - return hydro_trigger_df def createEvent(trigger_df): @@ -109,7 +140,6 @@ def createEvent(trigger_df): trigger_df = trigger_df.sort_values(by=[f'Date']).reset_index(drop=True) # Prepare commune info with geometry for event creation - event_data = [] # Keep track of which rows have been processed to avoid rechecking @@ -213,7 +243,7 @@ def events_per_adm (DataDir, admPath, adminLevel, station_csv, StationDataDir, a if __name__=="__main__": - for RP in cfg.RPsyr: - #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv")) - event_df = loop_over_stations (cfg.DNHstations, cfg.DataDir, RP) - event_gdf = events_per_adm (cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, all_events_df, 'Observation', RP) \ No newline at end of file + + #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv")) + event_df = loop_over_stations (cfg.DNHstations, cfg.DataDir, 95) + event_gdf = events_per_adm (cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, event_df, 'Observation', 95) \ No newline at end of file diff --git a/comparison/visualization/plot.py b/comparison/visualization/plot.py index 169889b..642bf54 100644 --- a/comparison/visualization/plot.py +++ b/comparison/visualization/plot.py @@ -7,11 +7,20 @@ class Visualizer: def __init__(self, DataDir, vector_adminMap): self.models = ['GloFAS', 'GoogleFloodHub', 'PTM'] # is PTM best way to refer to the current trigger model in the EAP? - self.colors = ['cornflowerblue', 'salmon','darkgreen'] # adjust pls if you want - self.linestyles = ['-', '--'] - self.markerstyle =['o','v'] + self.colors = ['cornflowerblue', 'salmon','darkgreen'] + self.markersize = 12 + self.admin_colors = [ + "gold", "cornflowerblue", "#ff7f00", "#4daf4a", "#e41a1c", "#984ea3", + "#a65628", "#f781bf", "#999999", "#a6cee3", + "#009E73", "#0072B2" + ]#['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan', ] # adjust pls if you want + self.linestyles = ['-', '--','-.' ] + self.markerstyles =['o','v', 's'] + self.comparisonTypes = ['Observation', 'Impact'] self.DataDir=DataDir self.gdf_shape=checkVectorFormat(vector_adminMap, shapeType='polygon') + self.cmap_r = 'RdYlBu_r' # 'cmc.batlow_r' # Reversed for FAR + self.cmap = 'RdYlBu' # 'cmc.batlow' # Default for POD def map_performance(self, scores_by_commune_gdf, RPyr, leadtime, comparisonType, model): """ @@ -27,17 +36,14 @@ def map_performance(self, scores_by_commune_gdf, RPyr, leadtime, comparisonType, ] metrics = ['pod', 'far', 'csi', 'pofd', 'accuracy', 'precision'] - # Define color maps - cmap = 'cmc.batlow' - cmap_r = 'cmc.batlow_r' for ax, metric, title in zip(axes.flatten(), metrics, titles): # Plot each metric self.gdf_shape.plot(ax=ax, color='lightgrey', alpha=0.5) if title in ['POD (Probability of Detection)', 'CSI (Critical Success Index)', 'Accuracy', 'Precision']: - scores_by_commune_gdf.plot(column=metric, cmap=cmap_r, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=1) + scores_by_commune_gdf.plot(column=metric, cmap=self.cmap_r, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=1) else: - scores_by_commune_gdf.plot(column=metric, cmap=cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=1) + scores_by_commune_gdf.plot(column=metric, cmap=self.cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=1) ax.set_title(title) ax.set_axis_off() @@ -58,10 +64,9 @@ def map_pod_far (self, scores_by_commune_gdf, RPyr, leadtime, comparisonType, mo metrics = ['pod', 'far'] # Define color maps - cmap_pod = 'cmc.batlow_r' # Reversed for POD - cmap_far = 'cmc.batlow' # Default for FAR - for ax, metric, title, cmap in zip(axes, metrics, titles, [cmap_pod, cmap_far]): + + for ax, metric, title, cmap in zip(axes, metrics, titles, [self.cmap, self.cmap_r]): # Plot each metric self.gdf_shape.plot(ax=ax, color='lightgrey', alpha=0.5) scores_by_commune_gdf.plot(column=metric, cmap=cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=1) @@ -76,7 +81,7 @@ def map_pod_far (self, scores_by_commune_gdf, RPyr, leadtime, comparisonType, mo def performance_over_param(self, admin_unit, data, standard_RP=5, standard_leadtime=168): - fig, axs = plt.subplots(2, 2, figsize=(15, 10)) + fig, axs = plt.subplots(2, 2, figsize=(10, 7)) fig.suptitle(f'Performance Metrics for {admin_unit}', fontsize=16) leadtimes = data['leadtimes'] lt_idx = leadtimes.index(standard_leadtime) @@ -88,58 +93,59 @@ def performance_over_param(self, admin_unit, data, standard_RP=5, standard_leadt ax = axs[0, 0] for model, color in zip(self.models, self.colors): # 2 in return period is 5yrs of return period !! 0=1.5 1=2, 2= 5 - ax.scatter(leadtimes, data['POD'][model]['Observation'][RP_idx,:], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') - ax.scatter(leadtimes, data['POD'][model]['Impact'][RP_idx,:], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') + ax.plot(leadtimes, data['POD'][model]['Observation'][RP_idx,:], color=color,marker=self.markerstyles[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') + ax.plot(leadtimes, data['POD'][model]['Impact'][RP_idx,:], color=color, marker=self.markerstyles[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') ax.set_xlabel('Leadtime (hours)') ax.set_ylabel('POD') - ax.set_title('POD vs Leadtime') + #ax.set_title('POD vs Leadtime') ax.set_xlim (leadtimes_x_lim ) ax.set_ylim([-0.05,1.05]) - ax.text (72.5, 0.97, f'Return period={standard_RP:.1f} years') - ax.legend() + ax.text (30, 0.97, f'Return period={standard_RP:.1f} years') + #ax.legend() #ax.grid(True) # Plot 2: POD against return period ax = axs[0, 1] for model, color in zip(self.models, self.colors): # 4th index in leadtime index is 168 hours, 7 days - ax.scatter(return_periods, data['POD'][model]['Observation'][:,lt_idx], color=color,marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') - ax.scatter(return_periods, data['POD'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') + ax.plot(return_periods, data['POD'][model]['Observation'][:,lt_idx], color=color,marker=self.markerstyles[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') + ax.plot(return_periods, data['POD'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyles[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') ax.set_xlabel('Return Period (years)') ax.set_ylabel('POD') - ax.set_title('POD vs Return Period') + #ax.set_title('POD vs Return Period') ax.set_xlim (RP_x_lim) ax.set_ylim([-0.05,1.05]) ax.text (1.6, 0.97, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime/24:.0f} days)') - ax.legend() + ax.legend(bbox_to_anchor=(1.9,1.0), loc='upper right') #ax.grid(True) # Plot 3: FAR against leadtime ax = axs[1, 0] for model, color in zip(self.models, self.colors): - ax.scatter(leadtimes, data['FAR'][model]['Observation'][RP_idx,:], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') - ax.scatter(leadtimes, data['FAR'][model]['Impact'][RP_idx,:], color=color,marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') + ax.plot () + ax.plot(leadtimes, data['FAR'][model]['Observation'][RP_idx,:], color=color,marker=self.markerstyles[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') + ax.plot(leadtimes, data['FAR'][model]['Impact'][RP_idx,:], color=color,marker=self.markerstyles[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') ax.set_xlabel('Leadtime (hours)') ax.set_ylabel('FAR') - ax.set_title('FAR vs Leadtime') + #ax.set_title('FAR vs Leadtime') ax.set_xlim (leadtimes_x_lim ) ax.set_ylim([-0.05,1.05]) - ax.text (72.5, 0.97, f'Return period={standard_RP:.1f} years') - ax.legend() + ax.text (30, 0.97, f'Return period={standard_RP:.1f} years') + #ax.legend() #ax.grid(True) # Plot 4: FAR against return period ax = axs[1, 1] for model, color in zip(self.models, self.colors): - ax.scatter(return_periods, data['FAR'][model]['Observation'][:,lt_idx], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') - ax.scatter(return_periods, data['FAR'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') + ax.plot(return_periods, data['FAR'][model]['Observation'][:,lt_idx], color=color, marker=self.markerstyles[0], linestyle=self.linestyles[0], label=f'{model} (Obs)') + ax.plot(return_periods, data['FAR'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyles[1], linestyle=self.linestyles[1], label=f'{model} (Impact)') ax.set_xlabel('Return Period (years)') ax.set_ylabel('FAR') - ax.set_title('FAR vs Return Period') + #ax.set_title('FAR vs Return Period') ax.set_xlim (RP_x_lim) ax.set_ylim([-0.05,1.05]) - ax.text (1.6, 0.97, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime/24:.0f} days)') - ax.legend() + ax.text (1.05, 0.97, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime/24:.0f} days)') + #ax.legend() #ax.grid(True) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) @@ -147,20 +153,174 @@ def performance_over_param(self, admin_unit, data, standard_RP=5, standard_leadt plt.savefig(filePath) plt.show() + def performance_over_leadtime_all(self, admin_units, standard_RP=5): #, standard_leadtime=168): + data_all_admin = [] + for admin_unit in admin_units: + data = collect_performance_measures(admin_unit, self.DataDir, cfg.leadtimes, cfg.RPsyr) + data_all_admin.append((admin_unit, data)) + + fig, axs = plt.subplots(2, 1, figsize=(14, 10)) + fig.suptitle('Performance Metrics Across Administrative Units', fontsize=16) + return_periods = cfg.RPsyr + RP_idx = return_periods.index(standard_RP) + leadtimes = cfg.leadtimes + leadtimes_x_lim = [min(leadtimes), max(leadtimes)] + + # Plot 1: POD against leadtime + ax = axs[0] + for admin_unit, color in zip(admin_units, self.admin_colors): + for model, marker in zip(self.models, self.markerstyles): + for comparison_type, linestyle in zip(self.comparisonTypes, self.linestyles): + ax.plot(leadtimes, + data_all_admin[admin_units.index(admin_unit)][1]['POD'][model][comparison_type][RP_idx, :], + color=color, + linestyle=linestyle, + marker=marker, + markersize=self.markersize, + label=f'{admin_unit}, {model}, {comparison_type}') + ax.set_xlabel('Leadtime (hours)') + ax.set_ylabel('POD') + ax.set_xlim(leadtimes_x_lim) + ax.set_ylim([-0.05, 1.05]) + ax.text(24, 1.10, f'Return period={standard_RP:.1f} years') + + + # Plot 3: FAR against leadtime + ax = axs[1] + for admin_unit, color in zip(admin_units, self.admin_colors): + for model, marker in zip(self.models, self.markerstyles): + for comparison_type, linestyle in zip(self.comparisonTypes, self.linestyles): + ax.plot(leadtimes, + data_all_admin[admin_units.index(admin_unit)][1]['FAR'][model][comparison_type][RP_idx, :], + color=color, + linestyle=linestyle, + marker=marker, + markersize=self.markersize) + ax.set_xlabel('Leadtime (hours)') + ax.set_ylabel('FAR') + ax.set_xlim(leadtimes_x_lim) + ax.set_ylim([-0.05, 1.05]) + ax.text(24, 1.10, f'Return period={standard_RP:.1f} years') + + # Custom Legend + from matplotlib.lines import Line2D + custom_lines = [ + Line2D([0], [0], color='black', linestyle=self.linestyles[0], label='Observation'), + Line2D([0], [0], color='black', linestyle=self.linestyles[1], label='Impact'), + #Line2D([0], [0], color='black', linestyle=self.linestyles[2], label='PTM'), + Line2D([0], [0], color='black', marker=self.markerstyles[0], linestyle='None', label='GloFAS'), + Line2D([0], [0], color='black', marker=self.markerstyles[1], linestyle='None', label='Google Flood Hub'), + Line2D([0], [0], color='black', marker=self.markerstyles[2], linestyle='None', label='PTM') + ] + color_lines = [Line2D([0], [0], color=color, lw=2, label=unit) for color, unit in zip(self.admin_colors, admin_units)] + fig.legend(handles=custom_lines + color_lines, + loc='lower center', + ncol=4, + fontsize='small', + frameon=False) + + plt.tight_layout(rect=[0, 0.08, 1, 0.95]) + filePath = f'{self.DataDir}/comparison/results/performance_metrics_all_admin_RP{standard_RP:.1f}.png' + plt.savefig(filePath) + plt.show() + def performance_over_return_period_all(self, admin_units, standard_leadtime=168): + data_all_admin = [] + for admin_unit in admin_units: + data = collect_performance_measures(admin_unit, self.DataDir, cfg.leadtimes, cfg.RPsyr) + data_all_admin.append((admin_unit, data)) + + fig, axs = plt.subplots(2, 1, figsize=(14, 10)) + fig.suptitle('Performance Metrics across Administrative Units', fontsize=16) + leadtimes = cfg.leadtimes + lt_idx = leadtimes.index(standard_leadtime) + return_periods = cfg.RPsyr + RP_x_lim = [min(return_periods) - 0.5, max(return_periods) + 0.5] + + + # Plot 2: POD against return period + ax = axs[0] + for admin_unit, color in zip(admin_units, self.admin_colors): + for model, marker in zip(self.models, self.markerstyles): + for comparison_type, linestyle in zip(self.comparisonTypes, self.linestyles): + ax.plot(return_periods, + data_all_admin[admin_units.index(admin_unit)][1]['POD'][model][comparison_type][:, lt_idx], + color=color, + linestyle=linestyle, + marker=marker, + markersize=self.markersize) + ax.set_xlabel('Return Period (years)') + ax.set_ylabel('POD') + ax.set_xlim(RP_x_lim) + ax.set_ylim([-0.05, 1.05]) + ax.text(1.5, 1.10, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime / 24:.0f} days)') + + + # Plot 4: FAR against return period + ax = axs[1] + for admin_unit, color in zip(admin_units, self.admin_colors): + for model, marker in zip(self.models, self.markerstyles): + for comparison_type, linestyle in zip(self.comparisonTypes, self.linestyles): + ax.plot(return_periods, + data_all_admin[admin_units.index(admin_unit)][1]['FAR'][model][comparison_type][:, lt_idx], + color=color, + linestyle=linestyle, + marker=marker, + markersize=self.markersize) + ax.set_xlabel('Return Period (years)') + ax.set_ylabel('FAR') + ax.set_xlim(RP_x_lim) + ax.set_ylim([-0.05, 1.05]) + ax.text(1.5, 1.10, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime / 24:.0f} days)') + + # Custom Legend + from matplotlib.lines import Line2D + custom_lines = [ + Line2D([0], [0], color='black', linestyle=self.linestyles[0], label='Observation'), + Line2D([0], [0], color='black', linestyle=self.linestyles[1], label='Impact'), + #Line2D([0], [0], color='black', linestyle=self.linestyles[2], label='PTM'), + Line2D([0], [0], color='black', marker=self.markerstyles[0], linestyle='None', label='GloFAS'), + Line2D([0], [0], color='black', marker=self.markerstyles[1], linestyle='None', label='Google Flood Hub'), + Line2D([0], [0], color='black', marker=self.markerstyles[2], linestyle='None', label='PTM') + ] + color_lines = [Line2D([0], [0], color=color, lw=2, label=unit) for color, unit in zip(self.admin_colors, admin_units)] + fig.legend(handles=custom_lines + color_lines, + loc='lower center', + ncol=4, + fontsize='small', + frameon=False) + + plt.tight_layout(rect=[0, 0.08, 1, 0.95]) + filePath = f'{self.DataDir}/comparison/results/performance_metrics_all_admin_leadtime{standard_leadtime}.png' + plt.savefig(filePath) + plt.show() + if __name__ =='__main__': vis = Visualizer(cfg.DataDir, cfg.admPath) - comparisonTypes = ['Observation', 'Impact'] - # for comparisonType in comparisonTypes: + # comparisonTypes = ['Observation', 'Impact'] + # models = ['GloFAS', 'PTM'] + # for model in models: + # for comparisonType in comparisonTypes: + # for leadtime in cfg.leadtimes: + # for RPyr in cfg.RPsyr: + # try: + # scores_path = f"{cfg.DataDir}/{model}/{comparisonType}/scores_byCommuneRP{RPyr:.1f}_yr_leadtime{leadtime:.0f}.gpkg" + # scores_by_commune_gdf = checkVectorFormat(scores_path) + # vis.map_pod_far(scores_by_commune_gdf, RPyr, leadtime, comparisonType, f'{model}') + # except: + # print (f'No path for leadtime: {leadtime}, RP: {RPyr}, {comparisonType}, for model: {model}') + # continue + # admin_units = [ 'KOULIKORO'] + admin_units = ['BLA', 'SAN','KIDAL', 'TOMINIAN', 'KANGABA', 'KOULIKORO', 'KOLONDIEBA', 'MOPTI', 'BAMAKO', 'SIKASSO', 'SEGOU', 'KATI'] + for leadtime in cfg.leadtimes: + vis.performance_over_return_period_all (admin_units, leadtime) + for RPyr in cfg.RPsyr: + vis.performance_over_leadtime_all (admin_units, standard_RP=RPyr) + # for admin_unit in admin_units: + # data = collect_performance_measures(admin_unit, cfg.DataDir, cfg.leadtimes, cfg.RPsyr) # for leadtime in cfg.leadtimes: # for RPyr in cfg.RPsyr: - # scores_path = f"{cfg.DataDir}/GloFAS/{comparisonType}/scores_byCommuneRP{RPyr:.1f}_yr_leadtime{leadtime:.0f}.gpkg" - # scores_by_commune_gdf = checkVectorFormat(scores_path) - # vis.map_pod_far(scores_by_commune_gdf, RPyr, leadtime, comparisonType, 'GloFAS') - # admin_units = [ 'KOULIKORO', 'SEGOU', 'KATI'] - admin_units = ['BLA', 'SAN','KIDAL', 'TOMINIAN', 'KANGABA', 'KOULIKORO', 'KOLONDIEBA', 'MOPTI', 'BAMAKO', 'SIKASSO', 'SEGOU', 'KATI'] - for admin_unit in admin_units: - data = collect_performance_measures(admin_unit, cfg.DataDir, cfg.leadtimes, cfg.RPsyr) - vis.performance_over_param(admin_unit, data, standard_RP=2.0, standard_leadtime=96) + # vis.performance_over_param(admin_unit, data, standard_RP=RPyr, standard_leadtime=leadtime) + # to do : ## add lines between points