def avg_within_ss(X, k):
"""
Compute the average within-cluster sum of squares. The code here can be
found "almost" anywhere online
Params:
--------
X: numpy array with observations and features to be clustered
k: number of clusters
Returns:
--------
avgwithinss: average within-cluster sum of squares
"""
model = MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=50,
n_init=3, max_no_improvement=10, verbose=0)
model.fit(X)
centroids = model.cluster_centers_
dist_c = cdist(X, centroids, 'euclidean')
dist = np.min(dist_c, axis=1)
avgwithinss = sum(dist**2)/X.shape[0]
return avgwithinss
评论列表
文章目录