def dataset_to_file(dataset, ngrams, filename='dataset'):
"""Save a dataset to a file.
Args:
dataset (:class:`np.ndarray`): the dataset to save (built with :func:`dataset_tools.build_dataset`)
ngrams (list of strings): the ngrams used to compute the features
filename (string): the filename without extension (will be .npz)
"""
num_samples, num_entries, num_features = dataset.shape
# We rehaspe the ndarray from 3D to 2D in order to write it into a text file
# Each line of the file will correspond to one cited paper
# Therefore, on each there will be the `num_entries` sets of features
dataset_sp = sparse.csr_matrix(dataset.reshape(num_samples*num_entries, num_features))
np.savez(filename, num_entries=np.array([num_entries]), data=dataset_sp.data, indices=dataset_sp.indices,
indptr=dataset_sp.indptr, shape=dataset_sp.shape, ngrams=ngrams)
评论列表
文章目录