def perc_var_explained(X,k):
"""
Compute the percentage of variance explained defined as between sum of squares
divided but the total sum of squares.
WARNING: It will take a while.
The code here can be found "almost" anywhere online.
Params:
--------
X: numpy array with observations and features to be clustered
k: number of clusters
Returns:
--------
pve: percentage of variance explained
"""
model = MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=50,
n_init=3, max_no_improvement=10, verbose=0)
model.fit(X)
centroids = model.cluster_centers_
dist_c = cdist(X, centroids, 'euclidean')
dist = np.min(dist_c, axis=1)
tot_withinss = sum(dist**2)
totss = sum(pdist(X)**2)/X.shape[0]
betweenss = totss - tot_withinss
pve = (betweenss/totss *100)
return pve
评论列表
文章目录