def jiebafenci(all_the_text):
re = ""
relist = ""
words = pseg.cut(all_the_text)
count = 0
for w in words:
flag = w.flag #??
tmp = w.word #??
#print "org: "+tmp
#\u4e00-\u9fa5?unicode???????????????????
#???unicode????Unicode???????????????????
if len(tmp)>1 and len(flag)>0 and flag[0] not in flag_list and tmp[0]>=u'/u4e00' and tmp[0]<=u'\u9fa5':
re = re + " " + w.word
re = re.replace("\n"," ").replace("\r"," ")
if len(re)>40:
relist = re
relist = relist + "\n"
return relist
data_pre.py 文件源码
python
阅读 16
收藏 0
点赞 0
评论 0
评论列表
文章目录