def ContentGenerator(csv_file,
batch_size,
create_csv_reader=csv.reader,
create_csv_writer=csv.writer):
"""Retrieves CSV data up to a batch size at a time.
Args:
csv_file: A file-like object for reading CSV data.
batch_size: Maximum number of CSV rows to yield on each iteration.
create_csv_reader, create_csv_writer: Used for dependency injection.
Yields:
Tuple (entity_count, csv_content) where:
entity_count: Number of entities contained in the csv_content. Will be
less than or equal to the batch_size and greater than 0.
csv_content: String containing the CSV content containing the next
entity_count entities.
"""
try:
csv.field_size_limit(800000)
except AttributeError:
pass
reader = create_csv_reader(csv_file, skipinitialspace=True)
exhausted = False
while not exhausted:
rows_written = 0
content = StringIO.StringIO()
writer = create_csv_writer(content)
try:
for i in xrange(batch_size):
row = reader.next()
writer.writerow(row)
rows_written += 1
except StopIteration:
exhausted = True
if rows_written > 0:
yield rows_written, content.getvalue()
评论列表
文章目录