From 6a978bb6d2f2e852f18f5b2c29ad50684651c807 Mon Sep 17 00:00:00 2001
From: ERKuipers <elskuipers@protonmail.com>
Date: Thu, 21 Nov 2024 16:34:59 +0100
Subject: [PATCH] stationname matchingpoint needs to be long

---
 GloFAS/GloFAS_analysis/HydroImpact.py | 29 +++++++++++++++++----------
 comparison/pointMatching.py           |  9 ++++++---
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/GloFAS/GloFAS_analysis/HydroImpact.py b/GloFAS/GloFAS_analysis/HydroImpact.py
index b174d86..f17bc16 100644
--- a/GloFAS/GloFAS_analysis/HydroImpact.py
+++ b/GloFAS/GloFAS_analysis/HydroImpact.py
@@ -1,12 +1,11 @@
 
 import GloFAS.GloFAS_prep.configuration as cfg
 import pandas as pd
-
+from GloFAS.GloFAS_prep.accent_remover import remove_accents
 import scipy.stats as stats
 from comparison.pointMatching import attributePoints_to_Polygon
 
 
-
 def parse_date_with_fallback(date_str, year):
     try:
         # Try to parse the date with the given year
@@ -57,6 +56,7 @@ def z_RP_station(HydroStations_RP_file, StationName, RP):
     z_RP = HydroStations_RP_df[HydroStations_RP_df['StationName'] == StationName][f'{RP}'].values
 
     return z_RP
+
 def QRP_fit (hydro_df, RP): 
 
     # 1. Extract the annual maximum discharge values
@@ -71,7 +71,7 @@ def QRP_fit (hydro_df, RP):
     return discharge_value
 
 
-def stampHydroTrigger(hydro_df, RP, HydroStations_RP_file, StationName): 
+def stampHydroTrigger(hydro_df, RP, StationName): 
     """
     Adds a 'trigger' column to the hydro_df DataFrame indicating whether the 'Value' exceeds the QRP threshold.
 
@@ -92,7 +92,7 @@ def stampHydroTrigger(hydro_df, RP, HydroStations_RP_file, StationName):
     Q_station = hydro_df["Value"] 
     
 
-    QRP= QRP_fit (hydro_df,RP)
+    QRP= QRP_fit (hydro_df, RP)
     #print (f"for {StationName} : return period Q= {QRP}")
     if not isinstance(QRP, (int, float)):
         raise ValueError(f"Expected QRP to be a scalar (int or float), but got {type(QRP)}.")
@@ -166,7 +166,7 @@ def createEvent(trigger_df):
         events_df = pd.DataFrame(columns=['Event', 'StartDate', 'EndDate'])
         return events_df
 
-def loop_over_stations(station_csv, DataDir, RP, HydroStations_RP_file, admPath): 
+def loop_over_stations(station_csv, DataDir, RP, admPath): 
     station_df = pd.read_csv (station_csv, header=0)
     #print (station_df.columns)
     hydrodir = rf"{DataDir}/DNHMali_2019\Q_stations"
@@ -181,25 +181,32 @@ def loop_over_stations(station_csv, DataDir, RP, HydroStations_RP_file, admPath)
         except: 
             print (f'no discharge measures found for station {StationName} in {BasinName}')
             continue
-        trigger_df = stampHydroTrigger (hydro_df, RP, HydroStations_RP_file, StationName)
+        trigger_df = stampHydroTrigger (hydro_df, RP, StationName)
         event_df = createEvent (trigger_df)
         event_df ['StationName'] = StationName
         all_events.append (event_df)
     
     #generate the gdf to merge with where the points are attributed to the respective administrative units
     
+
+    all_events_df = pd.concat (all_events, ignore_index=True)
+    print (all_events_df.columns)
     gdf_pointPolygon = attributePoints_to_Polygon (admPath, station_csv, 'StationName')
     print (gdf_pointPolygon.columns)
-    all_events_df = pd.concat (all_events, ignore_index=True)
-    hydro_events_df = gdf_pointPolygon.merge (all_events_df, on='StationName', how='left')
+    gdf_melt = gdf_pointPolygon.melt(id_vars=gdf_pointPolygon.columns.difference(['StationName', 'StationName_0', 'StationName_1', 'StationName_2']),
+                      value_vars=['StationName', 'StationName_0', 'StationName_1', 'StationName_2'],
+                      var_name='StationName_Type', value_name='StationName')
+    gdf_melt = gdf_melt.dropna(subset=['StationName'])
+
+    hydro_events_df = pd.merge (gdf_melt, all_events_df, on='StationName', how='inner')
     hydro_events_gdf = gpd.GeoDataFrame(hydro_events_df, geometry='geometry')    
     return hydro_events_gdf
 
 
 if __name__=="__main__": 
     DataDir = cfg.DataDir
-    station_csv = DataDir / f"DNHMali_2019/Stations_DNH.csv"
+    station_csv = cfg.DNHstations
     HydroStations_RP_file = DataDir / f"DNHMali_2019/HydroStations_RP.csv"
     #print (readcsv(f"{DataDir}/Données partagées - DNH Mali - 2019/Donne╠ües partage╠ües - DNH Mali - 2019/De╠übit du Niger a╠Ç Ansongo.csv"))
-    trigger_df = loop_over_stations (station_csv, DataDir, 5, HydroStations_RP_file, cfg.admPath)
-    print (trigger_df)
\ No newline at end of file
+    event_df = loop_over_stations (station_csv, DataDir, 5, cfg.admPath)
+    print (event_df)
\ No newline at end of file
diff --git a/comparison/pointMatching.py b/comparison/pointMatching.py
index d2c75d4..115361f 100644
--- a/comparison/pointMatching.py
+++ b/comparison/pointMatching.py
@@ -6,6 +6,7 @@
 from scipy.spatial import cKDTree
 from GloFAS.GloFAS_prep.vectorCheck import checkVectorFormat
 import GloFAS.GloFAS_prep.configuration as cfg 
+from GloFAS.GloFAS_prep.accent_remover import remove_accents
 
 def findclosestpoint(point_x, point_y, target_gdf):
     '''
@@ -175,12 +176,14 @@ def attributePoints_to_Polygon(
         # Collect the IDs of these points
         point_ids = points_within[ID2].tolist()
         # Update the GeoDataFrame with the list of point IDs
-        print (len(point_ids))
-        polygons_gdf.at[idx, f'{ID2}'] = point_ids[0]
+        
         nrstations_inpolygon = len (point_ids)
+        if nrstations_inpolygon>0:
+            polygons_gdf.at[idx, f'{ID2}'] = remove_accents(point_ids[0])
         if nrstations_inpolygon>1: 
             for stationnr in range(nrstations_inpolygon):
-                polygons_gdf.at[idx, f'{ID2}_{stationnr}'] = point_ids[stationnr-1]
+                polygons_gdf.at[idx, f'{ID2}_{stationnr}'] = remove_accents(point_ids[stationnr-1])
+
     # Write the updated GeoDataFrame to a CSV file
     output_file = StationDataDir / f'{filename}'
     polygons_gdf.drop(columns='geometry').to_csv(output_file, index=False)