example_2.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:nlp 作者: lhyxcxy 项目源码 文件源码
def kmeans(class_num):
    """
    kmeans ??
    :param class_num: ????
    :return:class_list[[??1???2],[??1???2]]
    """
    class_list=list();
    sentences_words,sentences=loadFile()
    vectorizer = CountVectorizer()  # ??????????????????????a[i][j] ??j??i???????
    transformer = TfidfTransformer()  # ??????????tf-idf??
    # ???fit_transform???tf-idf????fit_transform??????????
    #?????words_list  ???["? ? ?? ???","?? ??"] ?????????????list
    tfidf = transformer.fit_transform(vectorizer.fit_transform(sentences_words))

    #weight ???shape=[????????] ???????
    weight = tfidf.toarray()  # ?tf-idf?????????a[i][j]??j??i?????tf-idf??
    clf = KMeans(n_clusters=class_num)
    s = clf.fit(weight)
    for i in range(class_num):
        class_list.append(list())
    print clf.labels_
    for i in range(len(clf.labels_)):#clf.labels_ ??????????[1,3,2,5,0,3,5,4,1] ???????????
        class_label=clf.labels_[i]
        class_list[class_label].append(sentences[i])
        #print "#######?"+str(clf.labels_[i])+"?"+words_list[i]
    return class_list;
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号