Tfidf_count.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:Graduation-design 作者: Baichenjia 项目源码 文件源码
def TFIDF_result():
    str_handel_list = read_handel_list()   # ??30?????????????????????str
    str_test = read_test_list()  # ?????????????????str
    # ??TF-IDF???
    corpus = str_handel_list[:]  # TF-IDF????
    corpus.append(str_test)    # ????????????
    print "TF-IDF corpus building success..."
    ######################### ??scikit-learn?? TF-IDF????
    # ??????????????????????a[i][j] ??j??i???????
    vectorizer = CountVectorizer()
    # ??????????tf-idf??
    transformer = TfidfTransformer()
    # ???fit_transform???tf-idf????fit_transform??????????
    tfidf = transformer.fit_transform(vectorizer.fit_transform(corpus))
    # ????????????
    word = vectorizer.get_feature_names()
    # ?tf-idf?????????a[i][j]??j??i?????tf-idf??
    weight = tfidf.toarray()
    print "TF-IDF score is calcuated success..."
    # ???30???????????TF-IDF??
    results = []
    for j in range(len(word)):
        if word[j] == '??' or word[j] == '??' or len(word[j]) == 1:  # ??????????1??
            continue
        results.append((word[j], weight[30][j]))  # ??????????
    sorted_results = sorted(results, key=lambda result: result[1], reverse=True)   # ??????
    # ?TF-IDF???100????
    fp_tfidf_result = open("f://emotion/mysite/Label_extract/result_tfidf.txt", 'w+')
    tfidf_results = []
    for i in range(100):   # ???????100??????????????
        tfidf_results.append((sorted_results[i][0], sorted_results[i][1]))
        fp_tfidf_result.write(sorted_results[i][0] + ' ' + str(round(sorted_results[i][1], 10)))
        fp_tfidf_result.write('\n')
    fp_tfidf_result.close()
    return tfidf_results
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号