def elbow_test(X, max_cluster):
"""
This function performs the elbow test to determine the number of clusters for k-means clustering.
Parameters
----------
X : numpy array
2d list of floats.
max_cluster : int
The maximum number of clusters to desirable.
Returns
-------
number of clusters : int
The number of clusters for kmeans clustering
"""
from sklearn.cluster import KMeans
from sklearn import metrics
inertia_list = []
s_list = []
for cluster_cnt in range(max_cluster-1):
k_means = KMeans(n_clusters=cluster_cnt+2)
k_means.fit(X)
k_means_labels = k_means.labels_
s_factor = metrics.silhouette_score(X, k_means_labels, metric='euclidean')
s_list.append(s_factor)
kmeans_inertia = k_means.inertia_
inertia_list.append(kmeans_inertia)
inertia_cnt = 0
i_diff_list = []
for inertia in inertia_list:
#look for the difference between each difference in cluster number
if inertia_cnt != len(inertia_list) - 1:
i_diff = inertia - inertia_list[inertia_cnt + 1]
i_diff_list.append(i_diff)
inertia_cnt = inertia_cnt + 1
#find the biggest difference and use that number for the best number of cluster
max_diff = max(i_diff_list)
max_diff_index = i_diff_list.index(max_diff)
#+3 because of the counting
best_no_cluster = max_diff_index + 3
return best_no_cluster
评论列表
文章目录