def get_dsl_logon_query(screen):
q = None
for evtid in config.EVENTS_LOGON:
tmp = Q("match",event_identifier=evtid)
if q is None:
q = tmp
else:
q = q | tmp
if screen is True:
for evtid in config.EVENTS_LOGON_SCREEN:
q = q | Q("match",event_identifier=evtid)
return q
python类Q的实例源码
def get_logout_event(index,logonid,timestamp,maxtstamp,screen):
"""
Look for the logoff event belonging to the given logon id or a shutdown event.
"""
conn = connections.get_connection()
# workaround to fix time presition issues
timestamp = timestamp - 999
logoff = get_dsl_logoff_query(screen)
q = [ \
Q('match',data_type='windows:evtx:record') , \
Q('match',xml_string=logonid) , \
logoff \
]
s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'gte':timestamp,'lte':maxtstamp}).sort('-datetime')
res = s.execute()
try:
evt = res[0]
except:
evt = None
if evt is None:
q = [ Q('match',event_identifier=config.EVENT_SHUTDOWN) ]
s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'gte':timestamp,'lte':maxtstamp}).sort('-datetime')
res = s.execute()
try:
evt = res[0]
except:
evt = None
return evt
def get_last_shutdown(index,maxtstamp,pattern):
"""
Look for the last shutdown event
"""
conn = connections.get_connection()
q = [ \
Q('match',data_type='windows:evtx:record') , \
Q('match',event_identifier=config.EVENT_SHUTDOWN)
]
if pattern:
q.append(Q('query_string',query=pattern,analyze_wildcard=True))
s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte':maxtstamp}).sort('-datetime')[0:0]
s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('shutdown','top_hits',size=1)
res = s.execute()
ret = {}
for item in res.aggregations['computer']['buckets']:
ret[item['key']] = item['shutdown']['hits']['hits'][0]
if len(ret.keys()) == 0:
ret = None
return ret
def get_last_event(index,computer=None,maxdate=None,pattern=None):
conn = connections.get_connection()
q = [ \
Q('match',data_type='windows:evtx:record')
]
if computer is not None:
q.append(Q('match',computer_name=computer))
if pattern:
q.append(Q('query_string',query=pattern,analyze_wildcard=True))
if maxdate:
s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte': maxdate}).sort('-datetime')
else:
s = Search(using=conn, index=index).query(Q('bool',must=q)).sort('-datetime')
if computer is None:
s = s[0:0]
s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('last','top_hits',size=1)
res = s.execute()
if computer is None:
evt = {}
for item in res.aggregations['computer']['buckets']:
evt[item['key']] = item['last']['hits']['hits'][0]
if len(evt.keys()) == 0:
evt = None
else:
try:
evt = res[0]
except:
evt = None
return evt
def create_query_for_email(self, search, email):
return search.query(elasticsearch_dsl.Q({"match": {'email': email}}))
def paginate(self, index, q='*', limit=None, size=None, id_only=True):
if not size:
size = self.bulk_size
log.info('Limit %s, size %s (q = "%s")', limit, size, q)
s = Search(
using=self.client,
index=index,
doc_type=self.doc_type)
s = s.query(Q('query_string', query=q))
if limit:
size = min(size, limit)
s = s.extra(size=size)
s = s.params(
scroll='20m',
size=size)
if id_only:
s = s.source(False)
log.debug('Query: %s', simplejson.dumps(s.to_dict(), indent=2))
hits = []
overall = 0
for h in s.scan():
if limit is not None and overall >= limit:
raise StopIteration()
log.debug('Hit: %s (progress: %d)', h.meta.id, overall)
if overall < limit or not limit:
if id_only:
hits.append(h.meta.id)
else:
hits.append(h.to_dict())
if len(hits) == size:
yield iter(hits)
hits = []
overall += size
if len(hits):
yield iter(hits)
else:
raise StopIteration()
def get_records_list():
"""
Obtains list of all records for given type given time range.
:return: JSON with status "ok" or "error" and requested data.
"""
# Check login
if not session.logged:
json_response = '{"status": "Error", "data": "You must be logged!"}'
return json_response
# Check mandatory inputs
if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.type):
json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
return json_response
# Parse inputs and set correct format
beginning = escape(request.get_vars.beginning)
end = escape(request.get_vars.end)
type = escape(request.get_vars.type)
try:
# Elastic query
client = elasticsearch.Elasticsearch(
[{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
elastic_bool.append({'term': {'@stat_type': type}})
# Prepare query
qx = Q({'bool': {'must': elastic_bool}})
# Set query according to the statistic type
search_ip = Search(using=client, index='_all').query(qx)
search_ip.aggs.bucket('all_nested', 'nested', path='data_array')\
.bucket('by_key', 'terms', field='data_array.key.raw', size=2147483647)\
.bucket('stats_sum', 'sum', field='data_array.value')
results = search_ip.execute()
data = ""
for all_buckets in results.aggregations.all_nested.by_key:
data += all_buckets.key + "," + str(int(all_buckets.stats_sum.value)) + ","
# Remove trailing comma
data = data[:-1]
json_response = '{"status": "Ok", "data": "' + data + '"}'
return json_response
except Exception as e:
json_response = '{"status": "Error", "data": "Exception: ' + escape(str(e)) + '"}'
return json_response
def get_host_flows():
"""
Gets flows, packet and bytes time series for a given host
Returns: JSON with status "ok" or "error" and requested data.
"""
# Check login
if not session.logged:
json_response = '{"status": "Error", "data": "You must be logged!"}'
return json_response
# Check mandatory inputs
if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
return json_response
# Parse inputs and set correct format
beginning = escape(request.get_vars.beginning)
end = escape(request.get_vars.end)
aggregation = escape(request.get_vars.aggregation)
host_ip = escape(request.get_vars.host_ip)
try:
# Elastic query
client = elasticsearch.Elasticsearch(
[{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
elastic_bool.append({'term': {'src_ip': host_ip}})
qx = Q({'bool': {'must': elastic_bool}})
s = Search(using=client, index='_all').query(qx)
s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
.metric('sum_of_flows', 'sum', field='stats.total.flow') \
.metric('sum_of_packets', 'sum', field='stats.total.packets') \
.metric('sum_of_bytes', 'sum', field='stats.total.bytes')
result = s.execute()
data = "Timestamp,Number of flows,Number of packets,Number of bytes;"
for record in result.aggregations.by_time.buckets:
timestamp = record.key
number_of_flows = int(record.sum_of_flows.value)
number_of_packets = int(record.sum_of_packets.value)
number_of_bytes = int(record.sum_of_bytes.value)
data += str(timestamp) + "," + str(number_of_flows) + "," + str(number_of_packets) + "," + str(number_of_bytes) + ";"
json_response = '{"status": "Ok", "data": "' + data + '"}'
return (json_response)
except Exception as e:
json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
return json_response
def get_host_distinct_ports():
"""
Gets flows, packet and bytes time series for a given host
Returns: JSON with status "ok" or "error" and requested data.
"""
# Check login
if not session.logged:
json_response = '{"status": "Error", "data": "You must be logged!"}'
return json_response
# Check mandatory inputs
if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
return json_response
# Parse inputs and set correct format
beginning = escape(request.get_vars.beginning)
end = escape(request.get_vars.end)
aggregation = escape(request.get_vars.aggregation)
host_ip = escape(request.get_vars.host_ip)
try:
# Elastic query
client = elasticsearch.Elasticsearch(
[{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
elastic_bool.append({'term': {'src_ip': host_ip}})
qx = Q({'bool': {'must': elastic_bool}})
s = Search(using=client, index='_all').query(qx)
s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
.metric('dport_avg', 'avg', field='stats.dport_count') \
.metric('dport_max', 'max', field='stats.dport_count') \
.metric('dport_min', 'min', field='stats.dport_count')
result = s.execute()
data_avg = []
data_min_max = []
data_max = []
data_min = []
for record in result.aggregations.by_time.buckets:
timestamp = record.key
maximum = round(record.dport_max.value, 2) if record.dport_max.value else None
minimum = round(record.dport_min.value, 2) if record.dport_min.value else None
data_avg.append([timestamp,round(record.dport_avg.value,2) if record.dport_avg.value else None])
data_min_max.append([timestamp,[minimum, maximum ]])
data_max.append(maximum)
data_min.append(minimum)
json_response = {"status": "Ok", "data":{ "data_avg": data_avg, "data_min_max": data_min_max, "data_min": data_min, "data_max": data_max}}
return (json.dumps(json_response))
except Exception as e:
json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
return json_response
def get_host_distinct_peers():
"""
Gets flows, packet and bytes time series for a given host
Returns: JSON with status "ok" or "error" and requested data.
"""
# Check login
if not session.logged:
json_response = '{"status": "Error", "data": "You must be logged!"}'
return json_response
# Check mandatory inputs
if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
return json_response
# Parse inputs and set correct format
beginning = escape(request.get_vars.beginning)
end = escape(request.get_vars.end)
aggregation = escape(request.get_vars.aggregation)
host_ip = escape(request.get_vars.host_ip)
try:
# Elastic query
client = elasticsearch.Elasticsearch(
[{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
elastic_bool.append({'term': {'src_ip': host_ip}})
qx = Q({'bool': {'must': elastic_bool}})
s = Search(using=client, index='_all').query(qx)
s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
.metric('peer_avg', 'avg', field='stats.peer_number') \
.metric('peer_max', 'min', field='stats.peer_number') \
.metric('peer_min', 'max', field='stats.peer_number')
result = s.execute()
data_avg = []
data_min_max=[]
data_max = []
data_min = []
for record in result.aggregations.by_time.buckets:
timestamp = record.key
maximum = round(record.peer_max.value, 2) if record.peer_max.value else None
minimum = round(record.peer_min.value, 2) if record.peer_min.value else None
data_avg.append([timestamp, round(record.peer_avg.value, 2) if record.peer_avg.value else None])
data_min_max.append([timestamp, [minimum, maximum]])
data_max.append(maximum)
data_min.append(minimum)
json_response = {"status": "Ok", "data":{ "data_avg": data_avg, "data_min_max": data_min_max, "data_min": data_min, "data_max": data_max}}
return (json.dumps(json_response))
except Exception as e:
json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
return json_response
def get_records_list():
"""
Obtains list of all records for given type given time range.
:return: JSON with status "ok" or "error" and requested data.
"""
# Check login
if not session.logged:
json_response = '{"status": "Error", "data": "You must be logged!"}'
return json_response
# Check mandatory inputs
if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.filter):
json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
return json_response
# Parse inputs and set correct format
beginning = escape(request.get_vars.beginning)
end = escape(request.get_vars.end)
filter = escape(request.get_vars.filter)
try:
# Elastic query
client = elasticsearch.Elasticsearch(
[{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
elastic_bool = []
elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
elastic_bool.append({'term': {'@type': 'external_dns_resolver'}})
# Set filter
if filter != 'none':
elastic_should = []
elastic_should.append({'term': {'src_ip': filter}})
elastic_should.append({'term': {'resolver_ip.raw': filter}})
elastic_bool.append({'bool': {'should': elastic_should}})
qx = Q({'bool': {'must': elastic_bool}})
# Search with maximum size aggregations
search = Search(using=client, index='_all').query(qx)
search.aggs.bucket('by_src', 'terms', field='src_ip', size=2147483647)\
.bucket('by_dst', 'terms', field='resolver_ip.raw', size=2147483647)\
.bucket('top_src_dst', 'top_hits', size=1, sort=[{'timestamp': {'order': 'desc'}}])
results = search.execute()
# Result Parsing into CSV in format: timestamp, source_ip, resolver_ip, flows
data = ""
for src_aggregations in results.aggregations.by_src.buckets:
for result in src_aggregations.by_dst.buckets:
record = result.top_src_dst.hits.hits[0]["_source"]
data += record["timestamp"].replace("T", " ").replace("Z", "") + "," + record["src_ip"] + "," \
+ record["resolver_ip"] + "," + str(record["flows"]) + ","
data = data[:-1]
json_response = '{"status": "Ok", "data": "' + data + '"}'
return json_response
except Exception as e:
json_response = '{"status": "Error", "data": "Exception: ' + escape(str(e)) + '"}'
return json_response
def query_geonames(self, placename):
"""
Wrap search parameters into an elasticsearch query to the geonames index
and return results.
Parameters
---------
conn: an elasticsearch Search conn, like the one returned by `setup_es()`
placename: str
the placename text extracted by NER system
Returns
-------
out: The raw results of the elasticsearch query
"""
# first first, try for country name
if self.is_country(placename):
q = {"multi_match": {"query": placename,
"fields": ['name', 'asciiname', 'alternativenames'],
"type" : "phrase"}}
r = Q("match", feature_code='PCLI')
res = self.conn.query(q).query(r)[0:5].execute() # always 5
#self.country_exact = True
else:
# second, try for an exact phrase match
q = {"multi_match": {"query": placename,
"fields": ['name^5', 'asciiname^5', 'alternativenames'],
"type" : "phrase"}}
res = self.conn.query(q)[0:50].execute()
# if no results, use some fuzziness, but still require all terms to be present.
# Fuzzy is not allowed in "phrase" searches.
if res.hits.total == 0:
# tried wrapping this in a {"constant_score" : {"query": ... but made it worse
q = {"multi_match": {"query": placename,
"fields": ['name', 'asciiname', 'alternativenames'],
"fuzziness" : 1,
"operator": "and"},
}
#self.fuzzy = True # idea was to preserve this info as a feature, but not using state like this
res = self.conn.query(q)[0:50].execute()
es_result = utilities.structure_results(res)
return es_result
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False):
"""
Creates a search object and prepares it with metadata and query parameters that
we want to apply for all ES requests
Args:
user (User): User object
search_param_dict (dict): A dict representing the body of an ES query
filter_on_email_optin (bool): If true, filter out profiles where email_optin != True
Returns:
Search: elasticsearch_dsl Search object
"""
staff_program_ids = get_advance_searchable_program_ids(user)
is_advance_search_capable = bool(staff_program_ids)
search_obj = Search(index=get_default_alias(), doc_type=_get_search_doc_types(is_advance_search_capable))
# Update from search params first so our server-side filtering will overwrite it if necessary
if search_param_dict is not None:
search_obj.update_from_dict(search_param_dict)
if not is_advance_search_capable:
# Learners can't search for other learners with privacy set to private
search_obj = search_obj.filter(
~Q('term', **{'profile.account_privacy': Profile.PRIVATE})
)
# Limit results to one of the programs the user is staff on
search_obj = search_obj.filter(create_program_limit_query(
user,
staff_program_ids,
filter_on_email_optin=filter_on_email_optin
))
# Filter so that only filled_out profiles are seen
search_obj = search_obj.filter(
Q('term', **{'profile.filled_out': True})
)
# Force size to be the one we set on the server
update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE}
if search_param_dict is not None and search_param_dict.get('from') is not None:
update_dict['from'] = search_param_dict['from']
search_obj.update_from_dict(update_dict)
return search_obj
def index(self, query=None, radius=None, center=None, sort_by=None,
*args, **kwargs):
if not query and not radius and not center:
redirect('/jobs')
search_query = JobElastic().search()
relevance_sort = sort_by == 'scores'
if query:
keyword_queries = self._compute_keyword_queries(query)
decay_functions = self._compute_decay_functions()
search_query.query = Q(
'function_score',
query=keyword_queries,
functions=decay_functions
)
else:
relevance_sort = False
try:
geoloc_query = json.loads(center)
coordinates = geoloc_query['coordinates']
lat, lon = (coordinates['lat'], coordinates['lon'])
except (ValueError, TypeError):
# One of the following case has occurred:
# - Center wasn't a valid json string
# - Radius couldn't be converted to float
# Since both these information are required to set a geolocation
# filter are required, we ignore it.
pass
else:
search_query = self._apply_geolocation_filters(
search_query, (lat, lon), radius if radius else 5.0)
date_sort = not relevance_sort
if date_sort:
search_query = self._apply_date_sort(search_query)
return dict(sources=SOURCES, jobs=PaginatedSearch(search_query),
job_offer_search_form=JobsResearchForm)
def get_subscribers(self, targetings, hours_whitelist, volume):
logger.info("SubscriberService.get_subscribers: getting subscribers")
start_time = time.time()
timezones = [tz for tz in pytz.all_timezones
if (datetime
.now(pytz.timezone(tz)).hour
in hours_whitelist)]
targetings.append({
"field": "unsub",
"operator": "NOT IN",
"values": [1, "true"]
})
if timezones:
targetings.append({
"field": "timezone",
"operator": "IN",
"values": timezones
})
s = Search(using=es, index="users")
operator_mappings = {
'IN': 'must',
'NOT IN': 'must_not',
}
q = Q()
for condition in targetings:
condition_pair = {condition["field"]: condition["values"]}
terms_q = Q('terms', **condition_pair)
bool_operator = operator_mappings[condition['operator']]
bool_q = Q('bool', **{bool_operator: terms_q})
q += bool_q
s = s.query(q)
s.query = dslq.FunctionScore(
query=s.query,
functions=[dslq.SF('random_score')],
boost_mode="replace"
)
s = s[:volume]
try:
res = s.execute()
except Exception as e:
logger.error(f"SubscriberService.get_subscribers: Exception {e}")
else:
subscribers = []
for row in res.hits:
subscriber = row.to_dict()
subscriber['_id'] = row.meta.id
subscribers.append(subscriber)
end_time = time.time()
logger.debug(f"SubscriberService.get_subscribers: finished in "
f"{int((end_time - start_time) * 1000)}ms")
return subscribers