def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
"""batch sort helper with temporary files, supports sorting large stuff"""
if not output_class:
output_class = input_iterator.__class__
chunks = []
try:
while True:
current_chunk = list(islice(input_iterator, buffer_size))
if not current_chunk:
break
current_chunk.sort()
fd, filepath = tempfile.mkstemp()
os.close(fd)
output_chunk = output_class(filepath)
chunks.append(output_chunk)
for elem in current_chunk:
output_chunk.write(elem.obj)
output_chunk.close()
output_file = output_class(output_path)
for elem in heapq.merge(*chunks):
output_file.write(elem.obj)
else:
output_file.write()
output_file.close()
finally:
for chunk in chunks:
try:
chunk.close()
os.remove(chunk.name)
except Exception:
pass
# magic
评论列表
文章目录