def word_segment(line, stop=False, remain_number=True):
'''
???????
stop ??????
'''
if STOP_WORDS is None:
load_stopwords()
seg_list = jieba.cut(line, HMM=True)
sl = []
for word in seg_list:
word = word.strip()
if len(word) > 0 and word not in PUNCT:
if stop:
if word in STOP_WORDS:
word = None
if word is not None and not remain_number:
if util_func.atof(word) is not None:
word = None
if word is not None:
sl.append(word)
return sl
评论列表
文章目录