Textrank_count.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:Graduation-design 作者: Baichenjia 项目源码 文件源码
def build_matrix():
    ######????? ? ? ?????
    word_index = {}  # ????????
    index_word = {}  # ????????
    weibo_data = handel_weibo_data()  # ????????????
    index = 0
    for sent in weibo_data:  # ?????
        for word in sent:   # ?????????
            if not word in word_index.keys():
                word_index[word] = index
                index_word[index] = word
                index += 1
    words_number = index
    #print "words_number", words_number
    #######???????
    graph = np.zeros((words_number, words_number))  # ??????
    for word_list in weibo_data:  # ???
        for i in range(len(word_list)):  # ???????????????????????????????
            for j in range(i, len(word_list)):
                w1 = word_list[i]
                w2 = word_list[j]  # ???????????
                index1 = word_index[w1]
                index2 = word_index[w2]
                graph[index1][index2] += 1   # ?????????1
                graph[index2][index1] += 1   # ?????????
    ######?????networkx??pagerank?????????????????
    nx_graph = nx.from_numpy_matrix(graph)  # ??networdx
    scores = nx.pagerank(nx_graph, alpha=0.85)  # ??pagerank??
    sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)  # ????????
    key_words = []  # ??????????
    for index, score in sorted_scores:
        if index_word[index] == u'??' or index_word[index] == u'??' or len(index_word[index]) == 1:
            continue
        key_words.append((index_word[index], score))
    ########????????100????????
    fp_textrank_result = open('f://emotion/mysite/Label_extract/result_textrank.txt', 'w+')
    for i in range(100):
        fp_textrank_result.write(key_words[i][0] + ' ' + str(round(key_words[i][1], 10)))
        fp_textrank_result.write('\n')
    fp_textrank_result.close()
    """
    fp_test = open('f://emotion/mysite/Label_extract/test.txt', 'w+')
    for i in range(100):
        fp_test.write(key_words[i][0] + '?')
    fp_test.close()
    """
    print "textrank key word calculate is success..."
    return key_words
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号