Added lat lon coordinates to clustering algorithm as a default

Mathias157 · Oct 15, 2024 · c5c2deb · c5c2deb
1 parent 2e6ee8a
commit c5c2deb
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 14 deletions.
diff --git a/src/Modules/aggregate_inputs.py b/src/Modules/aggregate_inputs.py
@@ -257,7 +257,8 @@ def plot_transmission_invcost(symbol: str,
 
         exclusion.append((line[1], line[0]))
     # print(df.drop)
-    plt.show()
+
+    fig.savefig('ClusterOutput/Figures/%s.png')
 
 
 #%% ------------------------------- ###
@@ -271,11 +272,11 @@ def plot_transmission_invcost(symbol: str,
 @click.option('--mean-aggfuncs', type=str, required=False, default='', help='Parameters that should be aggregated with an average')
 @click.option('--median-aggfuncs', type=str, required=False, default='', help='Parameters that should be aggregated using the median value')
 @click.option('--zero-fillnas', type=str, required=False, default='', help='NaN values that should be converted to zero instead of EPS')
-@click.option('--only-symbols', type=str, required=False, help="Only aggregate the symbols, input as comma-separated string")
+@click.option('--only-symbols', type=str, required=False, default=None, help="Only aggregate the symbols, input as comma-separated string")
 def main(model_path: str, scenario: str, exceptions: str, 
          mean_aggfuncs: str, median_aggfuncs: str, 
-         zero_fillnas: str, incfile_folder: str = 'Output',
-         only_symbols: Union[str, None] = None):
+         zero_fillnas: str, only_symbols: Union[str, None], 
+         incfile_folder: str = 'Output'):
 
     # Make configuration lists
     exceptions = exceptions.replace(' ', '').split(',') # Symbols not to aggregate

diff --git a/src/Modules/clustering.py b/src/Modules/clustering.py
@@ -198,12 +198,11 @@ def gather_data(db: gams.GamsDatabase,
 def cluster(collected_data: pd.DataFrame,
             n_clusters: int,
             use_connectivity: bool = True,
-            manual_corrections: list = [
-                ['Roedovre', 'Frederiksberg', 1],
-            ],
+            manual_corrections: list = [],
             linkage: str = 'Ward',
             connection_remark: str = 'connec. included + artifical',
-            data_remark: str = 'all combined + xy coords'):
+            data_remark: str = 'all combined + xy coords',
+            include_coordinates: bool = True):
 
     # collected_data = collected_data.drop_sel(IRRRE='Christiansoe')
 
@@ -219,6 +218,7 @@ def cluster(collected_data: pd.DataFrame,
             connectivity.connection.loc[manual_connection[1], manual_connection[0]] = manual_connection[2]
 
         print('Is matrix symmetric?', np.all(connectivity.connection.data == connectivity.connection.data.T))
+
         ## Make symmetric index, so the indices fit
         collected_data = collected_data.assign_coords(IRRRI=collected_data.coords['IRRRE'].data)
 
@@ -236,16 +236,28 @@ def cluster(collected_data: pd.DataFrame,
     else:
         knn_graph = None # don't apply connectivity constraints
         X = collected_data
-
+
+    ## Combine with polygons for plotting and possible coordinate data
+    the_index, geofiles, c = prepared_geofiles('DKmunicipalities_names')
+    geofiles.index.name = 'IRRRE'
+    X = X.merge(geofiles['geometry'].to_xarray())
+
+    if include_coordinates:
+        ## Get coordinates 
+        coords = gpd.GeoDataFrame(geometry=X.geometry.data).centroid
+        X['lon'] = xr.DataArray(data=coords.x, coords={'IRRRE' : X.coords['IRRRE'].data})
+        X['lat'] = xr.DataArray(data=coords.y, coords={'IRRRE' : X.coords['IRRRE'].data})
+
     # Prepare data for clustering
-    Y = np.vstack([X.get(variable).data for variable in X.data_vars]).T
+    Y = np.vstack([X.get(variable).data for variable in X.data_vars if variable != 'geometry']).T
     Y = np.nan_to_num(Y)
     Y = StandardScaler().fit_transform(Y) # Normalise dataset
 
     ## Make higher weighting of certain coordinates..?
     # X[:,0] = X[:,0]*10000
     # X[:,1] = X[:,1]*10000
 
+
     # Perform Clustering
     agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage.lower(),
                                 connectivity=knn_graph)
@@ -255,10 +267,6 @@ def cluster(collected_data: pd.DataFrame,
     X['cluster_groups'] = (['IRRRE'], agg.labels_)
 
     # Plot the different clustering techniques
-    ## Get geofiles
-    the_index, geofiles, c = prepared_geofiles('DKmunicipalities_names')
-    geofiles.index.name = 'IRRRE'
-    X = X.merge(geofiles['geometry'].to_xarray())
 
 
     # Plot clustering