def ConvertToPlainText_Chunks(self, p_output_dir, p_file_number, p_chunk=True, p_chunk_size=5000):
file_name = self.GetFilename()
file_ext = self.GetFileExtension()
output_lines = self.GetPreparedLines()
# Optional line chunking
chunks = []
if p_chunk:
chunks = Utils_MalletInterpret.GetChunkedLines(output_lines, p_chunk_size)
else:
chunks.append(output_lines)
# Write out files
for index in range(len(chunks)):
with open("{0}{1}_{2}_{3}{4}".format(p_output_dir, p_file_number, file_name, index, file_ext), 'w') as plaintext_output_file:
for line in chunks[index]:
plaintext_output_file.write(unidecode(line) + u"\n")
p_file_number += 1
return len(chunks)
评论列表
文章目录