Skip to content

Commit

Permalink
Added lat lon coordinates to clustering algorithm as a default
Browse files Browse the repository at this point in the history
  • Loading branch information
Mathias157 committed Oct 15, 2024
1 parent 2e6ee8a commit c5c2deb
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
9 changes: 5 additions & 4 deletions src/Modules/aggregate_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ def plot_transmission_invcost(symbol: str,

exclusion.append((line[1], line[0]))
# print(df.drop)
plt.show()

fig.savefig('ClusterOutput/Figures/%s.png')


#%% ------------------------------- ###
Expand All @@ -271,11 +272,11 @@ def plot_transmission_invcost(symbol: str,
@click.option('--mean-aggfuncs', type=str, required=False, default='', help='Parameters that should be aggregated with an average')
@click.option('--median-aggfuncs', type=str, required=False, default='', help='Parameters that should be aggregated using the median value')
@click.option('--zero-fillnas', type=str, required=False, default='', help='NaN values that should be converted to zero instead of EPS')
@click.option('--only-symbols', type=str, required=False, help="Only aggregate the symbols, input as comma-separated string")
@click.option('--only-symbols', type=str, required=False, default=None, help="Only aggregate the symbols, input as comma-separated string")
def main(model_path: str, scenario: str, exceptions: str,
mean_aggfuncs: str, median_aggfuncs: str,
zero_fillnas: str, incfile_folder: str = 'Output',
only_symbols: Union[str, None] = None):
zero_fillnas: str, only_symbols: Union[str, None],
incfile_folder: str = 'Output'):

# Make configuration lists
exceptions = exceptions.replace(' ', '').split(',') # Symbols not to aggregate
Expand Down
28 changes: 18 additions & 10 deletions src/Modules/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,11 @@ def gather_data(db: gams.GamsDatabase,
def cluster(collected_data: pd.DataFrame,
n_clusters: int,
use_connectivity: bool = True,
manual_corrections: list = [
['Roedovre', 'Frederiksberg', 1],
],
manual_corrections: list = [],
linkage: str = 'Ward',
connection_remark: str = 'connec. included + artifical',
data_remark: str = 'all combined + xy coords'):
data_remark: str = 'all combined + xy coords',
include_coordinates: bool = True):

# collected_data = collected_data.drop_sel(IRRRE='Christiansoe')

Expand All @@ -219,6 +218,7 @@ def cluster(collected_data: pd.DataFrame,
connectivity.connection.loc[manual_connection[1], manual_connection[0]] = manual_connection[2]

print('Is matrix symmetric?', np.all(connectivity.connection.data == connectivity.connection.data.T))

## Make symmetric index, so the indices fit
collected_data = collected_data.assign_coords(IRRRI=collected_data.coords['IRRRE'].data)

Expand All @@ -236,16 +236,28 @@ def cluster(collected_data: pd.DataFrame,
else:
knn_graph = None # don't apply connectivity constraints
X = collected_data


## Combine with polygons for plotting and possible coordinate data
the_index, geofiles, c = prepared_geofiles('DKmunicipalities_names')
geofiles.index.name = 'IRRRE'
X = X.merge(geofiles['geometry'].to_xarray())

if include_coordinates:
## Get coordinates
coords = gpd.GeoDataFrame(geometry=X.geometry.data).centroid
X['lon'] = xr.DataArray(data=coords.x, coords={'IRRRE' : X.coords['IRRRE'].data})
X['lat'] = xr.DataArray(data=coords.y, coords={'IRRRE' : X.coords['IRRRE'].data})

# Prepare data for clustering
Y = np.vstack([X.get(variable).data for variable in X.data_vars]).T
Y = np.vstack([X.get(variable).data for variable in X.data_vars if variable != 'geometry']).T
Y = np.nan_to_num(Y)
Y = StandardScaler().fit_transform(Y) # Normalise dataset

## Make higher weighting of certain coordinates..?
# X[:,0] = X[:,0]*10000
# X[:,1] = X[:,1]*10000


# Perform Clustering
agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage.lower(),
connectivity=knn_graph)
Expand All @@ -255,10 +267,6 @@ def cluster(collected_data: pd.DataFrame,
X['cluster_groups'] = (['IRRRE'], agg.labels_)

# Plot the different clustering techniques
## Get geofiles
the_index, geofiles, c = prepared_geofiles('DKmunicipalities_names')
geofiles.index.name = 'IRRRE'
X = X.merge(geofiles['geometry'].to_xarray())


# Plot clustering
Expand Down

0 comments on commit c5c2deb

Please sign in to comment.