def best_string_mapping(threshold, left_list, right_list):
"""
This function tries to find the closest mapping with the best weight of two lists of strings.
Example:
List A List B
0: 'abc' 'abc'
1: 'cde' 'cde'
2: 'fgh' 'fgh
3: 'fgj
map_lists will try to map each element of List A to an element of List B, in respect to the given threshold.
As a[{0,1,2}] == b[{0,1,2}], those values will automatically be mapped. Additionally, a[2] will also be mapped to
b[3], if the threshold is low enough (cf. 0.5).
"""
def injective_map(ll, rl, inverse_result=False):
ret = dict()
for l_entry in ll:
for r_entry in rl:
if l_entry == r_entry:
sim = 1
else:
sim = fuzz.token_sort_ratio(l_entry, r_entry) / 100
if sim < threshold:
continue
if l_entry in ret:
_, old_sim = ret[l_entry]
if sim < old_sim:
continue
ret[l_entry] = r_entry, sim
return {(r, l) if inverse_result else (l, r) for l, (r, _) in ret.items()}
return injective_map(left_list, right_list) | injective_map(right_list, left_list, True)
评论列表
文章目录