def tokenize(
string, es_client,
field_name=None, index_name=None, analyzer_name=None):
"""Tokenize a string based on analyzer of the provided field
Args:
string (string): the string to tokenize
es_client (EsClient): elasticsearch client.
field_name (string): the field whose analyzer is used to
tokenize
index_name (string): name of the index
Returns:
tokens (list): a list of tokens
Raises:
ElasticsearchClientError: if no field name or index name
are available.
"""
if field_name is None:
field_name = es_client.field_name
if index_name is None:
index_name = es_client.index_name
req = {'body': string, 'index': index_name}
if analyzer_name is None:
req['field'] = field_name
else:
req['analyzer'] = analyzer_name
try:
response = es_client.indices.analyze(**req)
tokens = [d['token'] for d in response['tokens']]
except elasticsearch.exceptions.RequestError:
tokens = []
return tokens
评论列表
文章目录