def get_best_similar(data):
import difflib
key, use_similar, similar_pool = data
# try to find some close key in existing messages...
# Optimized code inspired by difflib.get_close_matches (as we only need the best match).
# We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
# as long as use_similar is not below ~0.7).
# Gives an overall ~20% of improvement!
#tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
#if tmp:
#tmp = tmp[0]
tmp = None
s = difflib.SequenceMatcher()
s.set_seq2(key[1])
len_key = len(key[1])
min_len = len_key // 2
max_len = len_key * 2
for x in similar_pool:
if min_len < len(x) < max_len:
s.set_seq1(x)
if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
sratio = s.ratio()
if sratio >= use_similar:
tmp = x
use_similar = sratio
return key, tmp
评论列表
文章目录