def compute_db_index(matrix, kmeans):
'''
Compute Davies-Bouldin index, a measure of clustering quality.
Faster and possibly more reliable than silhouette score.
'''
(n, m) = matrix.shape
k = kmeans.n_clusters
centers = kmeans.cluster_centers_
labels = kmeans.labels_
centroid_dists = sp_dist.squareform(sp_dist.pdist(centers))
# Avoid divide-by-zero
centroid_dists[np.abs(centroid_dists) < MIN_CENTROID_DIST] = MIN_CENTROID_DIST
wss = np.zeros(k)
counts = np.zeros(k)
for i in xrange(n):
label = labels[i]
# note: this is 2x faster than scipy sqeuclidean
sqdist = np.square(matrix[i,:] - centers[label,:]).sum()
wss[label] += sqdist
counts[label] += 1
# Handle empty clusters
counts[counts == 0] = 1
scatter = np.sqrt(wss / counts)
mixitude = (scatter + scatter[:, np.newaxis]) / centroid_dists
np.fill_diagonal(mixitude, 0.0)
worst_case_mixitude = np.max(mixitude, axis=1)
db_score = worst_case_mixitude.sum() / k
return db_score
评论列表
文章目录