def iter_chunks(self, chunksize=None):
"""
Iteratively yield the index as chunks of documents, each of size <= chunksize.
The chunk is returned in its raw form (matrix or sparse matrix slice).
The size of the chunk may be smaller than requested; it is up to the caller
to check the result for real length, using `chunk.shape[0]`.
"""
self.close_shard()
if chunksize is None:
# if not explicitly specified, use the chunksize from the constructor
chunksize = self.chunksize
for shard in self.shards:
query = shard.get_index().index
for chunk_start in xrange(0, query.shape[0], chunksize):
# scipy.sparse doesn't allow slicing beyond real size of the matrix
# (unlike numpy). so, clip the end of the chunk explicitly to make
# scipy.sparse happy
chunk_end = min(query.shape[0], chunk_start + chunksize)
chunk = query[chunk_start: chunk_end] # create a view
yield chunk
评论列表
文章目录