PTM PLOT

rodekruis · Dec 3, 2024 · a1a6eee · a1a6eee
1 parent 23881f2
commit a1a6eee
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 38 deletions.
diff --git a/PTM/PTM_analysis/performance_calculator.py b/PTM/PTM_analysis/performance_calculator.py
@@ -328,18 +328,18 @@ def calculateCommunePerformance(self):
             lambda x: self.calc_performance_scores(x[f'{self.comparisonType}'], x['Event'])
         )
         scores_byCommune_gdf = self.gdf_shape.merge(scores_by_commune, on=f'ADM{cfg.adminLevel}')
-        scores_byCommune_gdf.to_file(f"{self.DataDir}/{self.model}/{self.comparisonType}/scores_byADM{self.adminLevel}_RP{self.RPyr:.1f}yr_leadtime{self.leadtime}.gpkg")
-        scores_byCommune_gdf.drop(columns='geometry').to_csv(f"{self.DataDir}/{self.model}/{self.comparisonType}/scores_byADM{self.adminLevel}_RP{self.RPyr:.1f}yr_leadtime{self.leadtime}.csv")
+        scores_byCommune_gdf.to_file(f"{self.DataDir}/{self.model}/{self.comparisonType}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime}.gpkg")
+        scores_byCommune_gdf.drop(columns='geometry').to_csv(f"{self.DataDir}/{self.model}/{self.comparisonType}/scores_byCommuneRP{self.RPyr:.1f}_yr_leadtime{self.leadtime}.csv")
         return scores_byCommune_gdf
 
 if __name__=='__main__':
     # impact_csv = f'{cfg.DataDir}/Impact_data/impact_events_per_admin_529.csv'
     # comparisonType ='Impact'
     # for RPyr in cfg.RPsyr: 
+    #     # PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv'
+    #     ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos)
+    #     PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr)
     #     for leadtime in cfg.leadtimes:
-    #         # PTM_events = f'{cfg.DataDir}/PTM/floodevents_admUnit_RP{RPyr}yr.csv'
-    #         ptm_events_df = ptm_events (cfg.DNHstations, cfg.DataDir, RPyr, cfg.StationCombos)
-    #         PTM_events_per_adm = events_per_adm(cfg.DataDir, cfg.admPath, cfg.adminLevel, cfg.DNHstations, cfg.stationsDir, ptm_events_df, 'PTM', RPyr)
     #         # print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
     #         analyzer = PredictedToImpactPerformanceAnalyzer(cfg.DataDir, RPyr, impact_csv, cfg.triggerProb, cfg.adminLevel, cfg.admPath, cfg.startYear, cfg.endYear, cfg.years, PTM_events_per_adm, comparisonType, cfg.actionLifetime, 'PTM', leadtime)
     #         analyzer.matchImpact_and_Trigger()

diff --git a/comparison/collect_for_administrative_unit.py b/comparison/collect_for_administrative_unit.py
@@ -8,7 +8,7 @@ def collect_performance_measures(admin_unit, DataDir, leadtimes, return_periods)
 
     Parameters:
     - admin_unit (str): Name of the administrative unit (ADM2 field in the CSVs).
-    - DataDir (str): Base directory containing subdirectories for models (GloFAS, Google Floodhub, EAP).
+    - DataDir (str): Base directory containing subdirectories for models (GloFAS, Google Floodhub, PTM).
     - leadtimes (list): List of leadtimes to include (defines columns of the 2D array).
     - return_periods (list): List of return periods to include (defines rows of the 2D array).
 
@@ -18,25 +18,25 @@ def collect_performance_measures(admin_unit, DataDir, leadtimes, return_periods)
           'leadtimes': [...],
           'return_periods': [...],
           'POD': {
-              'GloFAS': {'observation': np.array([...]), 'impact': np.array([...])},
-              'Google Floodhub': {'observation': np.array([...]), 'impact': np.array([...])},
-              'EAP': {'observation': np.array([...]), 'impact': np.array([...])}
+              'GloFAS': {'Observation': np.array([...]), 'impact': np.array([...])},
+              'Google Floodhub': {'Observation': np.array([...]), 'impact': np.array([...])},
+              'PTM': {'Observation': np.array([...]), 'Impact': np.array([...])}
           },
           'FAR': {
-              'GloFAS': {'observation': np.array([...]), 'impact': np.array([...])},
-              'Google Floodhub': {'observation': np.array([...]), 'impact': np.array([...])},
-              'EAP': {'observation': np.array([...]), 'impact': np.array([...])}
+              'GloFAS': {'Observation': np.array([...]), 'Impact': np.array([...])},
+              'Google Floodhub': {'Observation': np.array([...]), 'Impact': np.array([...])},
+              'PTM': {'Observation': np.array([...]), 'Impact': np.array([...])}
           }
       }
     """
-    models = ['GloFAS', 'GoogleFloodHub', 'EAP']
+    models = ['GloFAS', 'GoogleFloodHub', 'PTM']
     data = {
         'leadtimes': leadtimes,
         'return_periods': return_periods,
-        'POD': {model: {'observation': np.full((len(return_periods), len(leadtimes)), np.nan),
-                        'impact': np.full((len(return_periods), len(leadtimes)), np.nan)} for model in models},
-        'FAR': {model: {'observation': np.full((len(return_periods), len(leadtimes)), np.nan),
-                        'impact': np.full((len(return_periods), len(leadtimes)), np.nan)} for model in models}
+        'POD': {model: {'Observation': np.full((len(return_periods), len(leadtimes)), np.nan),
+                        'Impact': np.full((len(return_periods), len(leadtimes)), np.nan)} for model in models},
+        'FAR': {model: {'Observation': np.full((len(return_periods), len(leadtimes)), np.nan),
+                        'Impact': np.full((len(return_periods), len(leadtimes)), np.nan)} for model in models}
     }
 
     for model in models:
@@ -45,7 +45,7 @@ def collect_performance_measures(admin_unit, DataDir, leadtimes, return_periods)
             print(f"Directory not found for model: {model}, skipping.")
             continue
 
-        for comparison_type in ['observation', 'impact']:
+        for comparison_type in ['Observation', 'Impact']:
             comp_dir = os.path.join(model_dir, comparison_type)
             if not os.path.exists(comp_dir):
                 print(f"Directory not found for comparison type: {comparison_type} in {model}, skipping.")
@@ -87,7 +87,7 @@ def collect_performance_measures(admin_unit, DataDir, leadtimes, return_periods)
     # These regions include Bamako, Koulikoro, Ségou, Mopti, Timbouctou and Gao, which have historically experienced frequent and severe flood events. 
     # Regions such as Mopti and Ségou are of particular concern due to their high exposure to flooding as well as their dense populations,
     # Bla is in Segou, impact data recorded there, no obs
-    # Segou is in Segou, observation data recorded  ,  only false negatives, but there is impact
+    # Segou is in Segou, Observation data recorded  ,  only false negatives, but there is impact
     # Kolondieba in Sikasso, okay score in obs, no impact 
     # San in segou: impact data
 

diff --git a/comparison/visualization/plot.py b/comparison/visualization/plot.py
@@ -6,9 +6,10 @@
 
 class Visualizer: 
     def __init__(self, DataDir, vector_adminMap):
-        self.models = ['GloFAS', 'GoogleFloodHub', 'EAP'] # is EAP best way to refer to the current trigger model in the EAP? 
+        self.models = ['GloFAS', 'GoogleFloodHub', 'PTM'] # is PTM best way to refer to the current trigger model in the EAP? 
         self.colors = ['cornflowerblue', 'salmon','darkgreen'] # adjust pls if you want 
         self.linestyles = ['-', '--']
+        self.markerstyle =['o','v']
         self.DataDir=DataDir
         self.gdf_shape=checkVectorFormat(vector_adminMap, shapeType='polygon')
 
@@ -73,74 +74,76 @@ def map_pod_far (self, scores_by_commune_gdf, RPyr, leadtime, comparisonType, mo
         plt.savefig(filePath)
         plt.show()
 
-    def performance_over_param(self, admin_unit, data): 
+    def performance_over_param(self, admin_unit, data, standard_RP=5, standard_leadtime=168): 
+
         fig, axs = plt.subplots(2, 2, figsize=(15, 10))
         fig.suptitle(f'Performance Metrics for {admin_unit}', fontsize=16)
         leadtimes = data['leadtimes']
+        lt_idx = leadtimes.index(standard_leadtime)
         return_periods = data['return_periods']
+        RP_idx = return_periods.index(standard_RP)
         leadtimes_x_lim = [min(leadtimes), max(leadtimes)]
-        RP_x_lim = [min(return_periods), max(return_periods)]
-
+        RP_x_lim = [min(return_periods)-0.5, max(return_periods)+0.5]
         # Plot 1: POD against leadtime
         ax = axs[0, 0]
         for model, color in zip(self.models, self.colors):
             # 2 in return period is 5yrs of return period !! 0=1.5 1=2, 2= 5
-            ax.plot(leadtimes, data['POD'][model]['observation'][2,:], color=color, linestyle=self.linestyles[0], label=f'{model} (Obs)')
-            ax.plot(leadtimes, data['POD'][model]['impact'][2,:], color=color, linestyle=self.linestyles[1], label=f'{model} (Impact)')
+            ax.scatter(leadtimes, data['POD'][model]['Observation'][RP_idx,:], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)')
+            ax.scatter(leadtimes, data['POD'][model]['Impact'][RP_idx,:], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)')
         ax.set_xlabel('Leadtime (hours)')
         ax.set_ylabel('POD')
         ax.set_title('POD vs Leadtime')
         ax.set_xlim (leadtimes_x_lim )
         ax.set_ylim([-0.05,1.05])
-        ax.text (72.5, 0.97, 'Return period = 5 years')
+        ax.text (72.5, 0.97, f'Return period={standard_RP:.1f} years')
         ax.legend()
         #ax.grid(True)
 
         # Plot 2: POD against return period
         ax = axs[0, 1]
         for model, color in zip(self.models, self.colors):
             # 4th index in leadtime index is 168 hours, 7 days 
-            ax.plot(return_periods, data['POD'][model]['observation'][:,4], color=color, linestyle=self.linestyles[0], label=f'{model} (Obs)')
-            ax.plot(return_periods, data['POD'][model]['impact'][:,4], color=color, linestyle=self.linestyles[1], label=f'{model} (Impact)')
+            ax.scatter(return_periods, data['POD'][model]['Observation'][:,lt_idx], color=color,marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)')
+            ax.scatter(return_periods, data['POD'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)')
         ax.set_xlabel('Return Period (years)')
         ax.set_ylabel('POD')
         ax.set_title('POD vs Return Period')
         ax.set_xlim (RP_x_lim)
         ax.set_ylim([-0.05,1.05])
-        ax.text (1.6, 0.97, 'Leadtime = 7 days')
+        ax.text (1.6, 0.97, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime/24:.0f} days)')
         ax.legend()
         #ax.grid(True)
 
         # Plot 3: FAR against leadtime
         ax = axs[1, 0]
         for model, color in zip(self.models, self.colors):
-            ax.plot(leadtimes, data['FAR'][model]['observation'][2,:], color=color, linestyle=self.linestyles[0], label=f'{model} (Obs)')
-            ax.plot(leadtimes, data['FAR'][model]['impact'][2,:], color=color, linestyle=self.linestyles[1], label=f'{model} (Impact)')
+            ax.scatter(leadtimes, data['FAR'][model]['Observation'][RP_idx,:], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)')
+            ax.scatter(leadtimes, data['FAR'][model]['Impact'][RP_idx,:], color=color,marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)')
         ax.set_xlabel('Leadtime (hours)')
         ax.set_ylabel('FAR')
         ax.set_title('FAR vs Leadtime')
         ax.set_xlim (leadtimes_x_lim )
         ax.set_ylim([-0.05,1.05])
-        ax.text (72.5, 0.97, 'Return period = 5 years')
+        ax.text (72.5, 0.97, f'Return period={standard_RP:.1f} years')
         ax.legend()
         #ax.grid(True)
 
         # Plot 4: FAR against return period
         ax = axs[1, 1]
         for model, color in zip(self.models, self.colors):
-            ax.plot(return_periods, data['FAR'][model]['observation'][:,4], color=color, linestyle=self.linestyles[0], label=f'{model} (Obs)')
-            ax.plot(return_periods, data['FAR'][model]['impact'][:,4], color=color, linestyle=self.linestyles[1], label=f'{model} (Impact)')
+            ax.scatter(return_periods, data['FAR'][model]['Observation'][:,lt_idx], color=color, marker=self.markerstyle[0], linestyle=self.linestyles[0], label=f'{model} (Obs)')
+            ax.scatter(return_periods, data['FAR'][model]['Impact'][:,lt_idx], color=color, marker=self.markerstyle[1], linestyle=self.linestyles[1], label=f'{model} (Impact)')
         ax.set_xlabel('Return Period (years)')
         ax.set_ylabel('FAR')
         ax.set_title('FAR vs Return Period')
         ax.set_xlim (RP_x_lim)
         ax.set_ylim([-0.05,1.05])
-        ax.text (1.6, 0.97, 'Leadtime = 7 days')
+        ax.text (1.6, 0.97, f'Leadtime={standard_leadtime:.0f} hours ({standard_leadtime/24:.0f} days)')
         ax.legend()
         #ax.grid(True)
 
         plt.tight_layout(rect=[0, 0.03, 1, 0.95])
-        filePath = f'{self.DataDir}/comparison/results/performance_metrics_{admin_unit}.png'
+        filePath = f'{self.DataDir}/comparison/results/performance_metrics_{admin_unit}_RP{standard_RP:.1f}_leadtime{standard_leadtime}.png'
         plt.savefig(filePath)
         plt.show()
 
@@ -153,7 +156,8 @@ def performance_over_param(self, admin_unit, data):
     #             scores_path = f"{cfg.DataDir}/GloFAS/{comparisonType}/scores_byCommuneRP{RPyr:.1f}_yr_leadtime{leadtime:.0f}.gpkg"
     #             scores_by_commune_gdf = checkVectorFormat(scores_path)
     #             vis.map_pod_far(scores_by_commune_gdf, RPyr, leadtime, comparisonType, 'GloFAS')
-    admin_units = ['BLA', 'SAN','KIDAL', 'TOMINIAN', 'KANGABA', 'KOULIKORO', 'KOLONDIEBA', 'MOPTI', 'BAMAKO', 'SIKASSO']
+    # admin_units = [ 'KOULIKORO', 'SEGOU', 'KATI']
+    admin_units = ['BLA', 'SAN','KIDAL', 'TOMINIAN', 'KANGABA', 'KOULIKORO', 'KOLONDIEBA', 'MOPTI', 'BAMAKO', 'SIKASSO', 'SEGOU', 'KATI']
     for admin_unit in admin_units:
         data = collect_performance_measures(admin_unit, cfg.DataDir, cfg.leadtimes, cfg.RPsyr)
-        vis.performance_over_param(admin_unit, data)
+        vis.performance_over_param(admin_unit, data, standard_RP=2.0, standard_leadtime=96)