def word_cluster(data, labels, k):
k_means = cluster.KMeans(n_clusters=k)
k_means.fit(data)
for i, label in enumerate(labels):
print label, k_means.labels_[i]
d = defaultdict(list)
for c, l in zip(k_means.labels_, labels):
d['cluster' + str(c)].append(l.name())
fname = 'results/clusters'
if use_wordnet:
fname += "_wn"
if use_wordvectors:
fname += "_wv"
fname += '_k' + str(k) + '.json'
with codecs.open(fname, 'wb', 'utf-8') as outfile:
outfile.write(json.dumps(d, indent=True))
print ' * Saved results to', fname
# create histogram of cluster sizes
histogram(d)
评论列表
文章目录