def build(cls, initializer, destfile = None, tempdir = None, idmap = None):
if destfile is None:
destfile = tempfile.NamedTemporaryFile(dir = tempdir)
initial_pos = destfile.tell()
write = destfile.write
write(cls._Header.pack(0, 0, 0))
write(cls._NewHeader.pack(cls._CURRENT_VERSION, cls._CURRENT_MINIMUM_READER_VERSION, 0, 0))
destfile.flush()
data_pos = destfile.tell()
schema = cls.schema
_index = []
index_parts = []
for item in initializer:
current_pos = data_pos - initial_pos
_index.append(current_pos)
buf = schema.pack(item, idmap, None, None, current_pos)
write(buf)
data_pos += len(buf)
if len(_index) >= 100000:
index_parts.append(numpy.array(_index, dtype = numpy.uint64))
del _index[:]
destfile.flush()
index_pos = destfile.tell()
if _index:
index_parts.append(numpy.array(_index, dtype = numpy.uint64))
del _index
if len(index_parts) > 1:
index = numpy.concatenate(index_parts)
elif index_parts:
index = index_parts[0]
else:
index = numpy.array([], dtype = numpy.uint64)
del index_parts
write(buffer(index))
destfile.flush()
schema_pos = destfile.tell()
cPickle.dump(schema, destfile, 2)
destfile.flush()
final_pos = destfile.tell()
destfile.seek(initial_pos)
write(cls._Header.pack(final_pos - initial_pos, index_pos - initial_pos, len(index)))
write(cls._NewHeader.pack(
cls._CURRENT_VERSION, cls._CURRENT_MINIMUM_READER_VERSION,
schema_pos - initial_pos, final_pos - schema_pos))
destfile.flush()
destfile.seek(final_pos)
return cls.map_file(destfile, initial_pos)
评论列表
文章目录