identify.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:Library-Identification 作者: Riscure 项目源码 文件源码
def compare_strings_concat_levenshtein(sample, ref):
    """
    Concatenates all strings from `sample` into one, and all strings
    from `ref` into another. They are then compared by their Levenshtein distance.
    This results in a fuzzy comparison: it detects changes within strings and
    within the list of strings.
    """
    if hasattr(ref, 'strs') and ref.strs is not None:
        i = 0
        ratios = 0
        for section in ref.strs:
            if section not in sample.strs:
                continue

            strs_a_concat = ''.join(sample.strs[section])
            strs_b_concat = ''.join(ref.strs[section])

            if len(strs_a_concat) == 0 or len(strs_b_concat) == 0:
                continue

            # Similarity meassurement from
            # Gheorghescu, M. (2005). An Automated Virus Classification System.
            # Virus Bulletin Conference, (October), 294-300.
            # (although they use it on a list of basic blocks instead of a
            # character string)

            ratio_sec = 1 - (Levenshtein.distance(strs_a_concat, strs_b_concat)
                            / float(max(len(strs_a_concat), len(strs_b_concat))))

            ratios += ratio_sec
            i += 1

        ratio = ratios / i if i > 0 else 0.0
    else:
        ratio = 0.0

    return (ratio * 100, ref.name, ref.version)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号