def load_embeddings_mp(path, word_dim, processes=None):
if processes is None:
processes = multiprocessing.cpu_count()
pool = mp.Pool(processes, initializer=_mp_initialize,
initargs=(word_dim,))
with open(path, "r") as f:
iterator = chunks(f, n=processes,
k=processes * 10000)
ret = {}
for batches in iterator:
results = pool.map_async(_mp_process, batches)
results = results.get()
results = aggregate_dicts(*results)
ret.update(results)
return ret
评论列表
文章目录