def _precompute(self, Xy_generator, cache):
with h5py.File(cache, mode='w') as fp:
# initialize with a fixed number of sequences
n_sequences = 1000
y = fp.create_dataset(
'y', shape=(n_sequences, ),
dtype=h5py.special_dtype(vlen=bytes),
maxshape=(None, ))
for i, (X_, y_) in enumerate(Xy_generator):
if i == 0:
_, n_samples, n_features = X_.shape
X = fp.create_dataset(
'X', dtype=X_.dtype, compression='gzip',
shape=(n_sequences, n_samples, n_features),
chunks=(1, n_samples, n_features),
maxshape=(None, n_samples, n_features))
# increase number of sequences on demand
if i == n_sequences:
n_sequences = int(n_sequences * 1.1)
y.resize(n_sequences, axis=0)
X.resize(n_sequences, axis=0)
# store current X, y in file
y[i] = y_
X[i] = X_
# resize file to exactly match the number of sequences
y.resize(i, axis=0)
X.resize(i, axis=0)
评论列表
文章目录