def bulk_process(logger, output_dir, filename_tmpl, function, func_input, var_arg, resume=False): # pylint: disable=too-many-arguments
"""Process a function in bulk using an iterable input and a variable argument."""
if not path.exists(output_dir):
makedirs(output_dir)
logger.info("created output directory: %s", output_dir)
num_processed = 0
for basename, value in func_input:
output_filename = path.join(output_dir, filename_tmpl % basename)
# check if there is a previous processing and skip or resume it
latest_id = None
if path.exists(output_filename):
if not resume:
logger.warning("skipping existing file: %s", output_filename)
continue
latest_id = _get_latest_id(output_filename)
# process the input element with the provided function
try:
logger.info("processing: %s", value)
args = {var_arg: value}
if latest_id is not None:
args.update({"since_id": latest_id})
logger.info("latest id processed: %d", latest_id)
with open(output_filename, "a" if resume else "w") as writer:
function(writer, **args)
num_processed += 1
except TweepError:
logger.exception("exception while using the REST API")
return num_processed
评论列表
文章目录