def parse():
"""parse the comments"""
import jieba
import jieba.posseg as pseg
# Load User's Dictionary
path_list = os.getcwd().split('/')
path_list.append("dict.txt")
dict_path = '/'.join(path_list)
jieba.load_userdict(dict_path)
# Disimss These Flags
dismiss = ['b', 'c', 'r', 'uj', 'u', 'p', 'q', 'uz', 't', 'ul', 'k', 'f',
'ud', 'ug', 'uv']
comments = Comment.query.all()
for comment in comments:
word_list = []
pseg_cut = pseg.cut(comment.body)
for word, flag in pseg_cut:
if flag not in dismiss:
word_list.append(word)
comment.parsed = '/'.join(word_list)
db.session.add(comment)
print "Comment %04d Parsed!" % comment.id
db.session.commit()
print "ALL DONE!"
评论列表
文章目录