def process_mp(texts, args, pool=None):
if pool is None:
pool = mp.Pool(args.n_processes,
initializer=mp_initialize,
initargs=(args, ))
iterator = chunks(enumerate(texts),
n=args.n_processes,
k=args.n_processes * 1000)
if args.progress:
t = tqdm.tqdm()
else:
t = None
results = []
for batches in iterator:
n_items = sum(len(x) for x in batches)
result = pool.map_async(process, batches)
result = result.get()
result = [i for batch in result for i in batch]
result.sort(key=lambda x: x[0])
idx, result = zip(*result)
results.extend(result)
if args.progress:
t.update(n_items)
return results
评论列表
文章目录