def tokenize(question, on='jieba'):
""" ???????????
:param question: ???????
:return: ?????????
"""
if on == 'ltp':
# LTP ??
words = segmentor.segment(question.encode('utf-8'))
rv = _remove_stopwords([i.decode('utf-8') for i in words])
else:
# jieba ??
rv = _remove_stopwords(jieba.lcut(question))
logging.debug("NLP:tokenize: {}".format(" ".join(rv)))
return rv
评论列表
文章目录