def corpus2dense(corpus, num_terms, num_docs=None, dtype=numpy.float32):
"""
Convert corpus into a dense numpy array (documents will be columns). You
must supply the number of features `num_terms`, because dimensionality
cannot be deduced from the sparse vectors alone.
You can optionally supply `num_docs` (=the corpus length) as well, so that
a more memory-efficient code path is taken.
This is the mirror function to `Dense2Corpus`.
"""
if num_docs is not None:
# we know the number of documents => don't bother column_stacking
docno, result = -1, numpy.empty((num_terms, num_docs), dtype=dtype)
for docno, doc in enumerate(corpus):
result[:, docno] = sparse2full(doc, num_terms)
assert docno + 1 == num_docs
else:
result = numpy.column_stack(sparse2full(doc, num_terms) for doc in corpus)
return result.astype(dtype)
评论列表
文章目录