def preview(self, fraction=0.1):
fetched = 0
scores = []
mod_print = int(1 / fraction)
while fetched < self.limit:
'''
use search instead of scan because keeping an ordered scan cursor
open negates the performance benefits
'''
resp = self.es.search(index='fcc-comments', body=self.query, size=self.limit)
print('total=%s mod_print=%s' % (resp['hits']['total'], mod_print))
for doc in resp['hits']['hits']:
fetched += 1
scores.append(doc['_score'])
if not fetched % mod_print:
print('\n--- comment %s\t%s\t%s\t%s' % (fetched, doc['_id'],
doc['_score'], doc['_source']['text_data'][:1000]))
评论列表
文章目录