def get_summary_statistics():
"""
Obtains statistics about current sum of flows, packets, bytes.
:return: JSON with status "ok" or "error" and requested data.
"""
try:
# Elastic query
client = elasticsearch.Elasticsearch([{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': "now-5m", 'lte': "now"}}})
elastic_bool.append({'term': {'@type': 'protocols_statistics'}})
qx = Q({'bool': {'must': elastic_bool}})
s = Search(using=client, index='_all').query(qx)
s.aggs.bucket('sum_of_flows', 'sum', field='flows')
s.aggs.bucket('sum_of_packets', 'sum', field='packets')
s.aggs.bucket('sum_of_bytes', 'sum', field='bytes')
s.sort('@timestamp')
result = s.execute()
# Result Parsing into CSV in format: timestamp, tcp protocol value, udp protocol value
data = "Timestamp, Flows, Packets, Bytes;"
timestamp = "Last 5 Minutes"
data += timestamp + ', ' +\
str(int(result.aggregations.sum_of_flows['value'])) + ', ' +\
str(int(result.aggregations.sum_of_packets['value'])) + ', ' +\
str(int(result.aggregations.sum_of_bytes['value']))
json_response = '{"status": "Ok", "data": "' + data + '"}'
return json_response
except Exception as e:
json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
return json_response
python类Q的实例源码
def query_geonames_country(self, placename, country):
"""
Like query_geonames, but this time limited to a specified country.
"""
# first, try for an exact phrase match
q = {"multi_match": {"query": placename,
"fields": ['name^5', 'asciiname^5', 'alternativenames'],
"type" : "phrase"}}
r = Q("match", country_code3=country)
res = self.conn.query(q).query(r)[0:50].execute()
# if no results, use some fuzziness, but still require all terms to be present.
# Fuzzy is not allowed in "phrase" searches.
if res.hits.total == 0:
# tried wrapping this in a {"constant_score" : {"query": ... but made it worse
q = {"multi_match": {"query": placename,
"fields": ['name', 'asciiname', 'alternativenames'],
"fuzziness" : 1,
"operator": "and"},
}
r = Q("match", country_code3=country)
res = self.conn.query(q).query(r)[0:50].execute()
out = utilities.structure_results(res)
return out
def query_missing(s, field, name, methods=None, responsecodes=None, invert=False):
# main query
q = Q("match", ** { field: name })
if not invert:
q = ~q
s.query = q
# add filters
## method
if methods:
s = s.filter("terms", ** { 'request.method': methods })
## response codes
if responsecodes:
for rc in responsecodes:
rcrange = rc.split("-")
if len(rcrange) == 2:
s = s.filter("range", ** { 'response.status': { "gte": int(rcrange[0]), "lte": int(rcrange[1]) } })
else:
s = s.filter("term", ** { 'response.status': rc })
print_debug(s.to_dict())
return s
def query_vals(s, field, name, values, invert):
# match documents where given field value name is present, if required
if values:
q = Q("nested", path=field, query=Q("wildcard", ** { field + ".value.keyword": values }))
if invert:
s.query = ~q
else:
s.query = q
else:
s.query = Q()
# 1. descent into response.headers/request.parameters
# 2. filter given header
# 3. aggregate values
# 4. jump back into main document
# 5. aggregate URLs
s.aggs.bucket("field", "nested", path=field)\
.bucket("valuefilter", "filter", Q("match", ** { field + ".name": name }))\
.bucket("values", "terms", field=field + ".value.keyword", size=args.size)\
.bucket("main", "reverse_nested")\
.bucket("urls", "terms", field="request.url.keyword", size=args.size)
return s
def about(request):
"""Information about the current site, its goals, and what content is loaded"""
# Provider counts
providers = cache.get_or_set(CACHE_STATS_NAME, [], CACHE_STATS_DURATION)
if not providers:
for provider in sorted(settings.PROVIDERS.keys()):
s = Search()
q = Q('term', provider=provider)
s = s.query(q)
response = s.execute()
if response.hits.total > 0:
data = settings.PROVIDERS[provider]
total = intcomma(response.hits.total)
data.update({'hits': total})
providers.append(data)
# All results
s = Search()
response = s.execute()
total = intcomma(response.hits.total)
providers.append({'display_name': 'Total', 'hits': total})
cache.set(CACHE_STATS_NAME, providers)
return render(request, "about.html", {'providers': providers})
def correct_orphan_records(self, provider='europeana', end=None):
"""[#185] Delete records from the search engine which aren't found in the database"""
s = Search()
q = Q('term', provider=provider)
s = s.query(q)
response = s.execute()
total = response.hits.total
# A file extracted from the production database listing all of the europeana identifiers
identifier_file = '/tmp/europeana-identifiers.json'
db_identifiers = set(json.load(open(identifier_file)))
total_in_db = len(db_identifiers)
log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db)
deleted_count = 0
for r in s.scan():
if r.identifier not in db_identifiers:
img = search.Image.get(id=r.identifier)
log.debug("Going to delete image %s", img)
deleted_count += 1
log.info("Deleted %d from search engine", deleted_count)
def aggregate(self, search):
"""
Add aggregations representing the facets selected, including potential
filters.
"""
for f, facet in iteritems(self.facets):
agg = facet.get_aggregation()
agg_filter = esd.Q('match_all')
for field, filter in iteritems(self._filters):
if f == field or (f.startswith("date") and field.startswith("date")):
continue
agg_filter &= filter
search.aggs.bucket(
'_filter_' + f,
'filter',
filter=agg_filter
).bucket(f, agg)
def build(self):
fs = self._clone()
for facet in self.facets:
if "include_%s" % facet.name not in self.args:
continue
agg_filter = esd.Q("match_all")
for inner in self.facets:
if inner.name != facet.name:
if inner.is_filtered(self.args):
agg_filter &= inner.filters(self.args)
for agg_name, agg in facet.aggregates():
fs.aggs.bucket("_filter_" + agg_name, "filter", filter=agg_filter).bucket(agg_name, agg)
post_filter = esd.Q('match_all')
for facet in self.facets:
if facet.is_filtered(self.args):
post_filter &= facet.filters(self.args)
fs.post_filter._proxied &= post_filter
return fs
def tag_by_email(self, emails, breached):
docs = []
s = Search(using=self.es).\
filter(Q({'terms': {'contact_email.keyword': emails}})).\
source(['id_submission'])
print('%s emails breached=%s' % (len(emails), breached))
for hit in s.scan():
docs.append(lib.bulk_update_doc(hit['id_submission'], {'breached': breached}))
if not len(docs) % 500:
print('\tfetched %s' % len(docs))
print('\t%s matches' % len(docs))
return docs
def run(self):
emails = {
'breached': set(),
'unbreached': set(),
}
# contact_email exists
must = [Q('exists', field='contact_email')]
# matches source if specified
if self.source:
must.append(Q({'term': {'analysis.source': self.source}}))
# not already tagged with breached
s = Search(using=self.es).\
query(FunctionScore(
query=Q('bool',
must=must,
must_not=[Q('exists', field='analysis.breached')]),
functions=[SF('random_score', seed=int(time.time()))]
)).\
source(['contact_email'])
print('%s breached: source=%s limit=%s' % (datetime.now().isoformat(), self.source,
self.limit))
print('query=\n%s' % json.dumps(s.to_dict()))
for filing in s[:self.limit]:
email = filing['contact_email']
if not email or email in emails['breached'] or email in emails['unbreached']:
continue
breached = self.is_breached(email)
emails['breached' if breached else 'unbreached'].add(email)
docs = []
print('done source=%s' % self.source)
if emails['breached']:
docs += self.tag_by_email(list(emails['breached']), True)
if emails['unbreached']:
docs += self.tag_by_email(list(emails['unbreached']), False)
try:
lib.bulk_update(self.es, docs)
except Exception as e:
print('error indexing: %s' % e)
def monitor(index, delta, query_string):
click.clear()
def cnt():
q = Q('query_string', query=query_string)
s = Search(
using=es.client,
index=index).query(q)
return s.count()
N = cnt()
tot = Search(using=es.client, index=index).count()
if not delta:
N = tot
log.info('Processing %d records (total: %d)', N, tot)
click.echo('You can exit by CTRL-C: results will still process')
bar = SlowOverallFancyBar('', max=N, grand_total=tot)
while True:
time.sleep(5.0)
try:
n = cnt()
if isinstance(n, int):
if delta:
done = N - n
else:
done = n
bar.goto(done)
except Exception as e:
log.warn('Cannot count: %s', e)
bar.finish()
def search(self, **kwargs):
q = kwargs.get('q', '*')
sort = kwargs.get('sort', 'timestamp')
search_after = kwargs.get('search_after')
size = kwargs.get('size', 50)
source = kwargs.get('source')
extra = dict(
size=size)
if search_after:
extra.update(dict(search_after=search_after))
s = Search(using=self.client, index=self.index_name)
if source:
s = s.source(source)
s = s.sort(sort)
s = s.query(Q('query_string', query=q))
s = s.extra(**extra)
log.info('Query: %s', s.to_dict())
r = s.execute()
count = r.hits.total
took = r.took
result = r, count, took
return result
def search_reports(state, must_terms, should_terms):
s = Report.search()
q = Q('bool',
must=[Q('match', body=term) for term in must_terms],
should=[Q('match', body=term) for term in should_terms],
minimum_should_match=1
)
s = s.filter('terms', state=[state]).query(q)
response = s.execute()
return response.to_dict()['hits']['hits']
def test_create_search_obj_filter(self, is_advance_search_capable):
"""
Test that Search objects are created with program-limiting and filled_out=True query parameters
"""
user = self.user if is_advance_search_capable else self.learner
search_obj = create_search_obj(user)
search_query_dict = search_obj.to_dict()
expected_program_query = Q(
'bool',
should=[
Q('term', **{'program.id': self.program.id})
],
minimum_should_match=1,
must=[
Q('term', **{'program.is_learner': True})
]
)
expected_filled_out_query = Q('term', **{'profile.filled_out': True})
expected_privacy_query = ~Q('term', **{'profile.account_privacy': 'private'})
assert 'query' in search_query_dict
assert 'bool' in search_query_dict['query']
assert 'filter' in search_query_dict['query']['bool']
assert len(search_query_dict['query']['bool']['filter']) == 2 if is_advance_search_capable else 3
expected_filters = [
expected_program_query.to_dict(),
expected_filled_out_query.to_dict(),
]
if not is_advance_search_capable:
expected_filters.insert(0, expected_privacy_query.to_dict())
assert search_query_dict['query']['bool']['filter'] == expected_filters
def create_program_limit_query(user, staff_program_ids, filter_on_email_optin=False):
"""
Constructs and returns a query that limits a user to data for their allowed programs
Args:
user (django.contrib.auth.models.User): A user
staff_program_ids (list of int): the list of program ids the user is staff for if any
filter_on_email_optin (bool): If true, filter out profiles where email_optin != true
Returns:
elasticsearch_dsl.query.Q: An elasticsearch query
"""
users_allowed_programs = get_searchable_programs(user, staff_program_ids)
# if the user cannot search any program, raise an exception.
# in theory this should never happen because `UserCanAdvanceSearchPermission`
# takes care of doing the same check, but better to keep it to avoid
# that a theoretical bug exposes all the data in the index
if not users_allowed_programs:
raise NoProgramAccessException()
must = [
Q('term', **{'program.is_learner': True})
]
if filter_on_email_optin:
must.append(Q('term', **{'profile.email_optin': True}))
# no matter what the query is, limit the programs to the allowed ones
# if this is a superset of what searchkit sends, this will not impact the result
return Q(
'bool',
should=[
Q('term', **{'program.id': program.id}) for program in users_allowed_programs
],
# require that at least one program id matches the user's allowed programs
minimum_should_match=1,
must=must,
)
def query(s, q):
s.query = Q("query_string", query=q)
return s
### Main ###
def get_es_query(self, s_query, s_fields, **kwargs):
""" Create and return elasticsearch Query.
You could overload this method for creating your custom
search Query object.
Arguments:
s_query: request search param
s_fields: search fields
Keyword arguments:
request: request object
view: view object
"""
return Q("multi_match", query=s_query, fields=s_fields)
def elasticsearch_retrieve_page_by_id(page_id):
query = Search().filter(Q("term", nid=int(page_id)))[:1]
result = query.execute()
if result.hits.total == 0:
return None
return result.hits[0]
def elasticsearch_delete_old():
_from = NEVER
_to = datetime.now() - timedelta(days=30)
query = Search().filter(Q("range", visited_at={'from': _from, 'to': _to}))
result = query.delete()
def elasticsearch_pages(context, sort, page):
result_limit = int(os.environ['RESULT_LIMIT'])
max_result_limit = int(os.environ['MAX_RESULT_LIMIT'])
start = (page - 1) * result_limit
end = start + result_limit
domain_query = Q("term", is_banned=False)
if context["is_up"]:
domain_query = domain_query & Q("term", is_up=True)
if not context["show_fh_default"]:
domain_query = domain_query & Q("term", is_crap=False)
if not context["show_subdomains"]:
domain_query = domain_query & Q("term", is_subdomain=False)
if context["rep"] == "genuine":
domain_query = domain_query & Q("term", is_genuine=True)
if context["rep"] == "fake":
domain_query = domain_query & Q("term", is_fake=True)
limit = max_result_limit if context["more"] else result_limit
has_parent_query = Q("has_parent", type="domain", query=domain_query)
if context['phrase']:
query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search']))
else:
query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search']))
query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end]
query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True)
if context["sort"] == "onion":
query = query.sort("_parent")
elif context["sort"] == "visited_at":
query = query.sort("-visited_at")
elif context["sort"] == "created_at":
query = query.sort("-created_at")
elif context["sort"] == "last_seen":
query = query.sort("-visited_at")
return query.execute()
def test_query(self):
"""It should be possible to query the search engine for results"""
q = Q("match", title="greyhounds")
s = self.s.query(q)
r = s.execute()
self.assertEqual(0, r.hits.total) # We haven't indexed anything, so no results are expected
def test_search(self):
"""It should be possible to find an item by query"""
self._index_img(self.img1)
s = self.s.query(Q("match", title="greyhounds"))
r = s.execute()
self.assertEquals(1, r.hits.total)
def test_remove_from_search_after_sync(self):
"""When an image is removed from the source, it should be removed from the search engine"""
self._index_img(self.removed)
s = self.s.query(Q("match", title="removed"))
r = s.execute()
self.assertEquals(1, r.hits.total)
with responses.RequestsMock() as rsps:
rsps.add(responses.HEAD, FOREIGN_URL + TEST_IMAGE_REMOVED, status=404)
self.removed.sync()
signals._update_search_index(self.removed)
self.es.indices.refresh()
s = self.s.query(Q("match", title="removed"))
r = s.execute()
self.assertEquals(0, r.hits.total)
def get(self, handler):
# Rip the search object out of the elasticsearch backend
sort = handler.sort
search = self.collection._state._backend.raw_backend().search
if handler.request.query_arguments.get('q'):
search = search.query(elasticsearch_dsl.Q('query_string', query=handler.request.query_arguments['q'][-1].decode('utf-8')))
else:
# This should technically be elsewhere but the search object
# does not provide a nice way to figure out if there is a query or not.
search = search.sort({'ref': {
'order': 'asc',
'unmapped_type': 'string'
}})
if handler.request.query_arguments.get('sort'):
search = search.sort({sort.key: {
'order': 'asc' if sort.order == 1 else 'desc',
'unmapped_type': 'string'
}})
# Hacking into the serializer
handler._serializer = self.get_serializer()
handler._view.parents = handler._view.parents + (self.collection,)
start = handler.page * handler.page_size
wrapper = SearchResultWrapper(search[start:start + handler.page_size])
return handler.write({
'meta': {
'total': wrapper.count(),
'perPage': handler.page_size
},
# TODO
'links': {},
'data': [handler.serialize(resource) for resource in wrapper]
})
def get_results(search_object, searched_fields, value):
search_query = value
if not search_query:
q = Q(query=None)
else:
q = Q("multi_match", query=search_query, fields=searched_fields, operator='and')
search_object = search_object.query(q)[0:SEARCH_RESULTS_PER_PAGE]
if search_query:
for searched_field in searched_fields:
search_object = search_object.highlight(searched_field)
return search_object
def add_filters(self, filters, regexp=False, negative=False):
"""
Add `filters` to the query.
`filters is a dict of the form {'field': value, field2: value2}, but you can also use a list of values
instead of a `str`. They'll be added as a _or_ (and not a _and_).
:param dict filters:
:param bool regexp:
:param bool negative:
:return:
"""
# We need to use multi_match, since we get the fields names dynamically.
for key, value in filters.items():
if isinstance(value, set):
value = list(value)
# There is no need to process empty values.
if not value:
continue
if isinstance(value, list):
if negative:
self.search = self.search.query(Q('bool', must_not=[
reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
)
else:
self.search = self.search.query(Q('bool', must=[
reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
)
else:
if negative:
self.search = self.search.query(~Q("multi_match", query=value, fields=[key]))
else:
self.search = self.search.query(Q("multi_match", query=value, fields=[key]))
def filters(self, args):
if self.name in args and args[self.name] == "":
return esd.Q('missing', field=self.name)
else:
return esd.Q('terms', **{self.name : [args.get(self.name), ]})
def filters(self, args):
if self.name in args and len(self.name) > 0:
return esd.Q('prefix', **{self.name : args.get(self.name), })
else:
return super().filters(args)
def filters(self):
range = {}
if self.value[0] is not None:
range["from"] = self.value[0]
if self.value[1] is not None:
range["to"] = self.value[1]
return esd.Q('range', **{self.name : range})
def get_dsl_logoff_query(screen):
q = None
for evtid in config.EVENTS_LOGOFF:
tmp = Q("match",event_identifier=evtid)
if q is None:
q = tmp
else:
q = q | tmp
if screen is True:
for evtid in config.EVENTS_LOGOFF_SCREEN:
q = q | Q("match",event_identifier=evtid)
return q