def split_corpus(filenames, sizes):
with open_files(filenames) as input_files:
output_filenames = []
for size in sizes:
if size == 0:
output_filenames.append(None)
continue
with open_temp_files(num=len(filenames)) as output_files:
for input_file, output_file in zip(input_files, output_files):
# if size is None, this will read the whole file,
# that's why we put train last
output_file.writelines(islice(input_file, size))
output_filenames.append([f.name for f in output_files])
return output_filenames
评论列表
文章目录