def get_all_keywords(file_name):
word_lists = [] # ?????
jieba.enable_parallel(8)
with codecs.open(file_name, 'r', encoding='utf-8') as f:
Lists = f.readlines() # ????
for List in Lists:
cut_list = list(jieba.cut(List))
for word in cut_list:
word_lists.append(word)
word_lists_set = set(word_lists) # ??????
word_lists_set = list(word_lists_set)
length = len(word_lists_set)
print u"??%d????" % length
information = pd.read_excel('/Users/huazi/Desktop/zhanlang2.xlsx')
world_number_list = []
word_copy=[]
for w in word_lists_set:
if (len(w) == 1):
continue
if (word_lists.count(w) > 3):
world_number_list.append(word_lists.count(w))
word_copy.append(w)
information['key'] = word_copy
information['count'] = world_number_list
information.to_excel('sun_2.xlsx')
# ????
评论列表
文章目录