def do_kmeans(data, k):
km = sklearn.cluster.KMeans(n_clusters=k)
km.fit(data)
means = km.cluster_centers_.reshape((-1,))
#initialize standard deviations with distances between random cluster centers
sds = []
for i in range(means.shape[0]):
# choose any 2 means and take half the distance between them
x, y = np.random.choice(means, 2, replace=False)
sds.append((x-y)/2)
sds = np.abs(np.array(sds))
return (means, sds)
# expectation maximization for gmm
# use_kmeans: whether to initialize using kmeans or randomly
# use_priors: whether to model the prior distribution;
# this attaches a weight to each distribution that tells us
# the percentage of points generated from that distribution
评论列表
文章目录