def search_for_field(search_obj, field_name, page_size=DEFAULT_ES_LOOP_PAGE_SIZE):
"""
Retrieves all unique instances of a field for documents that match an ES query
Args:
search_obj (Search): Search object
field_name (str): The name of the field for the value to get
page_size (int): Number of docs per page of results
Returns:
set: Set of unique values
"""
results = set()
# Maintaining a consistent sort on '_doc' will help prevent bugs where the
# index is altered during the loop.
# This also limits the query to only return the field value.
search_obj = search_obj.sort('_doc').fields(field_name)
loop = 0
all_results_returned = False
while not all_results_returned:
from_index = loop * page_size
to_index = from_index + page_size
search_results = execute_search(search_obj[from_index: to_index])
# add the field value for every search result hit to the set
for hit in search_results.hits:
results.add(getattr(hit, field_name)[0])
all_results_returned = to_index >= search_results.hits.total
loop += 1
return results
评论列表
文章目录