def paginate(self, index, q='*', limit=None, size=None, id_only=True):
if not size:
size = self.bulk_size
log.info('Limit %s, size %s (q = "%s")', limit, size, q)
s = Search(
using=self.client,
index=index,
doc_type=self.doc_type)
s = s.query(Q('query_string', query=q))
if limit:
size = min(size, limit)
s = s.extra(size=size)
s = s.params(
scroll='20m',
size=size)
if id_only:
s = s.source(False)
log.debug('Query: %s', simplejson.dumps(s.to_dict(), indent=2))
hits = []
overall = 0
for h in s.scan():
if limit is not None and overall >= limit:
raise StopIteration()
log.debug('Hit: %s (progress: %d)', h.meta.id, overall)
if overall < limit or not limit:
if id_only:
hits.append(h.meta.id)
else:
hits.append(h.to_dict())
if len(hits) == size:
yield iter(hits)
hits = []
overall += size
if len(hits):
yield iter(hits)
else:
raise StopIteration()
评论列表
文章目录