def compare_strings_concat_levenshtein(sample, ref):
"""
Concatenates all strings from `sample` into one, and all strings
from `ref` into another. They are then compared by their Levenshtein distance.
This results in a fuzzy comparison: it detects changes within strings and
within the list of strings.
"""
if hasattr(ref, 'strs') and ref.strs is not None:
i = 0
ratios = 0
for section in ref.strs:
if section not in sample.strs:
continue
strs_a_concat = ''.join(sample.strs[section])
strs_b_concat = ''.join(ref.strs[section])
if len(strs_a_concat) == 0 or len(strs_b_concat) == 0:
continue
# Similarity meassurement from
# Gheorghescu, M. (2005). An Automated Virus Classification System.
# Virus Bulletin Conference, (October), 294-300.
# (although they use it on a list of basic blocks instead of a
# character string)
ratio_sec = 1 - (Levenshtein.distance(strs_a_concat, strs_b_concat)
/ float(max(len(strs_a_concat), len(strs_b_concat))))
ratios += ratio_sec
i += 1
ratio = ratios / i if i > 0 else 0.0
else:
ratio = 0.0
return (ratio * 100, ref.name, ref.version)
评论列表
文章目录