def k_means(points, k, **kwargs):
'''
Find k centroids that attempt to minimize the k- means problem:
https://en.wikipedia.org/wiki/Metric_k-center
Arguments
----------
points: (n, d) set of points
k: int, number of centroids to compute
**kwargs: passed directly to scipy.cluster.vq.kmeans
Returns
----------
centroids: (k, d) set of points
labels: (n) set of indexes for which points belong to which centroid
'''
from scipy.cluster.vq import kmeans
from scipy.spatial import cKDTree
points = np.asanyarray(points)
points_std = points.std(axis=0)
whitened = points / points_std
centroids_whitened, distortion = kmeans(whitened, k, **kwargs)
centroids = centroids_whitened * points_std
tree = cKDTree(centroids)
labels = tree.query(points, k=1)[1]
return centroids, labels
评论列表
文章目录