-
Notifications
You must be signed in to change notification settings - Fork 21
/
kmeans_elbow.py
29 lines (26 loc) · 1.06 KB
/
kmeans_elbow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd
from sklearn.cluster import KMeans
def elbow_plot(data, maxK=10, seed_centroids=None):
"""
parameters:
- data: pandas DataFrame (data to be fitted)
- maxK (default = 10): integer (maximum number of clusters with which to run k-means)
- seed_centroids (default = None ): float (initial value of centroids for k-means)
"""
sse = {}
for k in range(1, maxK):
print("k: ", k)
if seed_centroids is not None:
seeds = seed_centroids.head(k)
kmeans = KMeans(n_clusters=k, max_iter=500, n_init=100, random_state=0, init=np.reshape(seeds, (k,1))).fit(data)
data["clusters"] = kmeans.labels_
else:
kmeans = KMeans(n_clusters=k, max_iter=300, n_init=100, random_state=0).fit(data)
data["clusters"] = kmeans.labels_
# Inertia: Sum of distances of samples to their closest cluster center
sse[k] = kmeans.inertia_
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
plt.show()
return
# elbow_plot(data)