kadist-tag-cluster.py 文件源码-python代码片段

kadist-tag-cluster.py 文件源码

python

阅读 18 收藏 0 点赞 0 评论 0

项目：wordnet-clusters 作者: darenr 项目源码文件源码

def word_cluster(data, labels, k):
    k_means = cluster.KMeans(n_clusters=k)
    k_means.fit(data)
    for i, label in enumerate(labels):
        print label, k_means.labels_[i]

    d = defaultdict(list)
    for c, l in zip(k_means.labels_, labels):
        d['cluster' + str(c)].append(l.name())
    fname = 'results/clusters'
    if use_wordnet:
        fname += "_wn"
    if use_wordvectors:
        fname += "_wv"
    fname += '_k' + str(k) + '.json'
    with codecs.open(fname, 'wb', 'utf-8') as outfile:
        outfile.write(json.dumps(d, indent=True))
        print ' * Saved results to', fname
        # create histogram of cluster sizes
        histogram(d)