def preprocess_per_news(news):
content = jieba.posseg.cut(news) # ??????
content = filter(lambda x: hasMeaningfulWords(x), content) # ???????????
content = [i.word for i in content]
content = filter(lambda x: len(x) > 1, content) # ?????
stopword_file = "/Users/luoyi/Scala/OnlineRS/com/Recsys_engine/data/stop_word.txt"
stopw = [line.strip().decode('utf-8') for line in open(stopword_file).readlines()]
parsed = set(content) - set(stopw)
return ' '.join(parsed)
util.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录