def word_tokenization(tick_blog_list):
'''
word tokenization by jieba to list
return list : [[,], [,], ...]
'''
count = 0
seg_list = []
try:
for blog in tick_blog_list:
count += 1
if blog != '':
segments = jieba.cut(blog)
tmp = []
for seg in segments:
tmp.append(seg)
seg_list.append(tmp)
else:
print('Line%d is empty!' % cnt)
except IOError as e:
logging.error('IOError %s' % e)
finally:
return seg_list
#-------------------------------------------------------------------------------
reviews_preprocessing.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录