def WordBeark():
logger.info("running Word Beark in " + path + data)
inputfile = path + data + ".zhs"
outputfile = path + data + ".wordbreak"
i = 0
output = open(outputfile, 'w')
input = open(inputfile, 'r')
for line in input.readlines():
seg_list = jieba.cut(line)
output.write(u' '.join(seg_list))
i = i + 1
if (i % 10000 == 0):
logger.info("Cut " + str(i) + " articles")
output.close()
logger.info("Finished Saved " + str(i) + " articles in " + outputfile)
process_corpus.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录