def export_data_h5(vocabulary, embedding_matrix, output='embedding.h5'):
f = h5py.File(output, "w")
compress_option = dict(compression="gzip", compression_opts=9, shuffle=True)
words_flatten = '\n'.join(vocabulary)
f.attrs['vocab_len'] = len(vocabulary)
print len(vocabulary)
dt = h5py.special_dtype(vlen=str)
_dset_vocab = f.create_dataset('words_flatten', (1, ), dtype=dt, **compress_option)
_dset_vocab[...] = [words_flatten]
_dset = f.create_dataset('embedding', embedding_matrix.shape, dtype=embedding_matrix.dtype, **compress_option)
_dset[...] = embedding_matrix
f.flush()
f.close()
embedding_h5.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录