def get_variants_dictionary_value_from_key(csv_file_path, dictionary_key, logger, **kwargs):
"""
Reads the csv file at csv_file_path and create a dictionary mapping entity value to a list of their variants.
the entity values are first column of the csv file and their corresponding variants are stored in the second column
delimited by '|'
Args:
csv_file_path: absolute file path of the csv file populate entity data from
dictionary_key: name of the entity to be put the values under
logger: logging object to log at debug and exception level
kwargs:
Refer http://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.bulk
Returns:
Dictionary mapping entity value to a list of their variants.
"""
dictionary_value = defaultdict(list)
try:
csv_reader = read_csv(csv_file_path)
next(csv_reader)
for data_row in csv_reader:
try:
data = map(str.strip, data_row[1].split('|'))
# remove empty strings
data = [variant for variant in data if variant]
dictionary_value[data_row[0].strip().replace('.', ' ')].extend(data)
except Exception as e:
logger.exception('%s: \t\t== Exception in dict creation for keyword: %s -- %s -- %s =='
% (log_prefix, dictionary_key, data_row, e))
except Exception as e:
logger.exception(
'%s: \t\t\t=== Exception in __get_variants_dictionary_value_from_key() Dictionary Key: %s \n %s ===' % (
log_prefix,
dictionary_key, e.message))
return dictionary_value
评论列表
文章目录