def segment(self, text, lower = True, use_stop_words = True, use_speech_tags_filter = False):
"""????????????list???????
Keyword arguments:
lower -- ?????????????
use_stop_words -- ??True???????????????????
use_speech_tags_filter -- ?????????????True????self.default_speech_tag_filter??????????
"""
text = util.as_text(text)
jieba_result = pseg.cut(text)
if use_speech_tags_filter == True:
jieba_result = [w for w in jieba_result if w.flag in self.default_speech_tag_filter]
else:
jieba_result = [w for w in jieba_result]
# ??????
word_list = [w.word.strip() for w in jieba_result if w.flag!='x']
word_list = [word for word in word_list if len(word)>0]
if lower:
word_list = [word.lower() for word in word_list]
if use_stop_words:
word_list = [word.strip() for word in word_list if word.strip() not in self.stop_words]
return word_list
评论列表
文章目录