def get_word_list(self, text, lower=True, strip_stop_words=True, use_tag_filter=False):
text = util.as_text(text)
jieba_result = pseg.cut(text)
if use_tag_filter:
jieba_result = [
w for w in jieba_result if w.flag in self.default_tag_filter]
else:
jieba_result = [w for w in jieba_result]
word_list = [w.word.strip() for w in jieba_result if w.flag != 'x']
word_list = [word for word in word_list if len(word) > 0]
if lower:
word_list = [word.lower() for word in word_list]
if strip_stop_words:
word_list = [word.strip()
for word in word_list if word.strip() not in self.stop_words]
return word_list
评论列表
文章目录