def build_matrix():
######????? ? ? ?????
word_index = {} # ????????
index_word = {} # ????????
weibo_data = handel_weibo_data() # ????????????
index = 0
for sent in weibo_data: # ?????
for word in sent: # ?????????
if not word in word_index.keys():
word_index[word] = index
index_word[index] = word
index += 1
words_number = index
#print "words_number", words_number
#######???????
graph = np.zeros((words_number, words_number)) # ??????
for word_list in weibo_data: # ???
for i in range(len(word_list)): # ???????????????????????????????
for j in range(i, len(word_list)):
w1 = word_list[i]
w2 = word_list[j] # ???????????
index1 = word_index[w1]
index2 = word_index[w2]
graph[index1][index2] += 1 # ?????????1
graph[index2][index1] += 1 # ?????????
######?????networkx??pagerank?????????????????
nx_graph = nx.from_numpy_matrix(graph) # ??networdx
scores = nx.pagerank(nx_graph, alpha=0.85) # ??pagerank??
sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) # ????????
key_words = [] # ??????????
for index, score in sorted_scores:
if index_word[index] == u'??' or index_word[index] == u'??' or len(index_word[index]) == 1:
continue
key_words.append((index_word[index], score))
########????????100????????
fp_textrank_result = open('f://emotion/mysite/Label_extract/result_textrank.txt', 'w+')
for i in range(100):
fp_textrank_result.write(key_words[i][0] + ' ' + str(round(key_words[i][1], 10)))
fp_textrank_result.write('\n')
fp_textrank_result.close()
"""
fp_test = open('f://emotion/mysite/Label_extract/test.txt', 'w+')
for i in range(100):
fp_test.write(key_words[i][0] + '?')
fp_test.close()
"""
print "textrank key word calculate is success..."
return key_words
评论列表
文章目录