def insert_into_reverse_dict(self, hash_val, text):
"""
????: ??????
????:
@hash: ??text????
@text: ??text
????: ??????????????20%?, ???????, ????????.
"""
word_num = 0;
weight_avg = 0;
weight_total = 0;
word_list = []
weight_list = []
# ????
word_with_weight = jieba.analyse.extract_tags(text, withWeight=True)
for word, weight in word_with_weight:
word_num += 1;
weight_total += float(weight);
if word_num > 0:
weight_avg = weight_total / word_num;
for word, weight in word_with_weight:
if weight < (self.rate * weight_avg):
break
word_list.append(word);
weight_list.append(weight);
# ???????
list_len = len(word_list)
key_list = self.gen_key_list(word_list, weight_list, list_len, self.word_max_len)
for key in key_list:
self.reverse_dict.add(key, 100, hash_val); # ????(key -> hash)
评论列表
文章目录