def set_levenshtein(self):
'''
Mean and max Levenshtein ratio for all labels.
'''
if not [f for f in self.features if f.startswith('match_str_lsr')]:
return
ne = self.cluster.entities[0].norm
# Pref label
l = self.document.get('pref_label')
self.match_str_lsr_pref = Levenshtein.ratio(ne, l)
# Wikidata alt labels
if self.document.get('wd_alt_label'):
wd_labels = self.document.get('wd_alt_label')
ratios = [Levenshtein.ratio(ne, l) for l in wd_labels]
self.match_str_lsr_wd_max = max(ratios) - 0.5
self.match_str_lsr_wd_mean = (sum(ratios) /
float(len(wd_labels))) - 0.375
else:
wd_labels = []
# Any other alt labels
if self.document.get('alt_label'):
labels = self.document.get('alt_label')
labels = [l for l in labels if l not in wd_labels]
if labels:
ratios = [Levenshtein.ratio(ne, l) for l in labels]
self.match_str_lsr_alt_max = max(ratios) - 0.5
self.match_str_lsr_alt_mean = (sum(ratios) /
float(len(labels))) - 0.375
评论列表
文章目录