def cluster(X, seed=0, n_clusters=20, alg='kmeans'):
"""
Perform k-means on given X data. For alg, use one of:
'kmeans' (sklearn KMeans) or 'spherical' (SphericalKMeans)
returns (X pred clusters, cluster centers)
NOTE: euclidean tends to perform very poorly
"""
# log("Clustering k-means with {} clusters".format(n_clusters))
if alg == 'kmeans':
Model = KMeans
elif alg == 'spherical':
# inplace l2 normalization (spherical k-means assumes this)
normalize(X, 'l2', copy=False)
Model = SphericalKMeans
kmeans = Model(
n_clusters=int(n_clusters), random_state=seed
)
pred_clusters = kmeans.fit_predict(X)
return pred_clusters, kmeans.cluster_centers_
评论列表
文章目录