You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
range_n_clusters= [3, 4, 5, 6]
fig, ax=plt.subplots(4, 2, figsize=(16, 20))
# [0, 0] [0, 1] # [1, 0] [1, 1]# [2, 0] [2, 1]# [3, 0] [3, 1]forrow, n_clustersinenumerate(range_n_clusters):
# Create a subplot with 1 row and 2 columns# fig, (ax1, ax2) = plt.subplots(1, 2)# fig.set_size_inches(12, 6)# The 1st subplot is the silhouette plot# The silhouette coefficient can range from -1, 1 but in this example all# lie within [-0.1, 1]ax[row, 0].set_xlim([-0.1, 1])
# The (n_clusters+1)*10 is for inserting blank space between silhouette# plots of individual clusters, to demarcate them clearly.ax[row, 0].set_ylim([0, len(X) + (n_clusters+1) *10])
# get predictions for each labelcluster_labels=kmeans_k[n_clusters-2].predict(X)
# The silhouette_score gives the average value for all the samples.# This gives a perspective into the density and separation of the formed# clusterssilhouette_avg=silhouette_score(X, cluster_labels)
print("For n_clusters =", n_clusters,
"The average silhouette_score is :", silhouette_avg)
# Compute the silhouette score for each samplesample_silhouette_values=silhouette_samples(X, cluster_labels)
y_lower=10foriinrange(n_clusters):
# Aggregate the silhouette scores for samples belonging to# cluster i, and sort themith_cluster_silhouette_values= \
sample_silhouette_values[cluster_labels==i]
ith_cluster_silhouette_values.sort()
size_cluster_i=ith_cluster_silhouette_values.shape[0]
y_upper=y_lower+size_cluster_icolor=cm.nipy_spectral(float(i) /n_clusters)
ax[row, 0].fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middleax[row, 0].text(-0.05, y_lower+0.5*size_cluster_i, str(i))
# Compute the new y_lower for next ploty_lower=y_upper+10# 10 for the 0 samplesax[row, 0].set_title("Silhouette diagram with $k$ = {}".format(n_clusters), fontsize=16)
ax[row, 0].set_xlabel("Silhouette coefficient values", fontsize=14)
ax[row, 0].set_ylabel("Cluster label", fontsize=12)
ax[row, 0].set_yticks([]) # Clear the yaxis labels / ticksax[row, 0].set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1])
ax[row, 0].tick_params(axis='both', which='major', labelsize=12)
# The vertical line for average silhouette score of all the valuesax[row, 0].axvline(x=silhouette_avg, color="black", linestyle="--", linewidth=2)
colors=cm.nipy_spectral(cluster_labels.astype(float) /n_clusters)
ax[row, 1].scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
c=colors, edgecolor='k')
# Labeling the clusterscenters=kmeans_k[n_clusters-2].cluster_centers_# Draw white circles at cluster centersax[row, 1].scatter(centers[:, 0], centers[:, 1], marker='o',
c="white", alpha=1, s=200, edgecolor='k')
fori, cinenumerate(centers):
ax[row, 1].scatter(c[0], c[1], marker='$%d$'%i, alpha=1,
s=50, edgecolor='k')
ax[row, 1].set_title(f"Clustered data with k = {n_clusters}", fontsize=16)
ax[row, 1].set_xlabel("$x_1$", fontsize=14)
ax[row, 1].set_ylabel("$x_2$", fontsize=14)
ax[row, 1].set_yticks([]) # Clear the yaxis labels / ticksax[row, 1].set_xticks([])
ax[row, 1].grid()
# fig.suptitle(("Silhouette analysis for KMeans clustering "# "with $k$ = %d" % n_clusters), fontsize=14, fontweight='bold')plt.tight_layout()
plt.savefig("silhouette_diagram.png", dpi=650, transparent=False)
plt.show()
For n_clusters = 3 The average silhouette_score is : 0.6716211668230646
For n_clusters = 4 The average silhouette_score is : 0.6893052248264789
For n_clusters = 5 The average silhouette_score is : 0.7095494270839506
For n_clusters = 6 The average silhouette_score is : 0.6532479727390482