def getTrainData(inpath,outfile):
i=0
for filename in os.listdir(inpath):
fw = open(outfile+str(i)+".cut","w") #???????????
i=i+1
file_object = open(inpath+"\\"+filename,'r', encoding='UTF-8')
try:
all_the_text = file_object.read()
#all_the_text = all_the_text.decode("gb2312").encode("utf-8")
pre_text = jiebafenci(all_the_text)
pre_text.encode('UTF-8')
if len(pre_text)>30:
fw.write(pre_text)
except:
print('@'*20)
pass
finally:
file_object.close()
fw.close()
#['C000008', 'C000010', 'C000013', 'C000014', 'C000016', 'C000020', 'C000022','C000023', 'C000024']
data_process.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录