def _ingest_pairs(self, pairs, oid2nid, frame_size, limit, single_sided):
oid2nid_v = np.vectorize(oid2nid.get)
# whole pairs set does not fit in memory, so split it in frames with `frame_size` number of pairs.
for start in range(0, limit, frame_size):
stop = frame_size + start
t1 = process_time()
six.print_('Fetching pairs {0}:{1} of {2} ... '.format(start, stop, limit), end='', flush=True)
raw_frame = pairs.read(start=start, stop=stop)
t2 = process_time()
six.print_('{0}s, Parsing ... '.format(int(t2 - t1)), flush=True)
frame = self._translate_frame(raw_frame, oid2nid_v, single_sided)
t3 = process_time()
six.print_('Writing ... '.format(int(t3 - t2)), flush=True)
# alternate direction, to make use of cached chunks of prev frame
self._ingest_pairs_frame(frame)
del frame
t4 = process_time()
six.print_('{0}s, Done with {1}:{2} in {3}s'.format(int(t4 - t3), start, stop, int(t4 - t1)), flush=True)
评论列表
文章目录