python类SequenceMatcher()的实例源码

routing.py 文件源码 项目:zanph 作者: zanph 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None
lib.py 文件源码 项目:release-script 作者: mitodl 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def match_user(slack_users, author_name, threshold=0.6):
    """
    Do a fuzzy match of author name to full name. If it matches, return a formatted Slack handle. Else return original
    full name.

    Args:
        slack_users (list of dict): A list of slack users from their API
        author_name (str): The commit author's full name
        threshold (float): All matches must be at least this high to pass.

    Returns:
        str: The slack markup for the handle of that author.
             If one can't be found, the author's name is returned unaltered.
    """

    lower_author_name = reformatted_full_name(author_name)

    def match_for_user(slack_user):
        """Get match ratio for slack user, or 0 if below threshold"""
        lower_name = reformatted_full_name(slack_user['profile']['real_name'])
        ratio = SequenceMatcher(a=lower_author_name, b=lower_name).ratio()
        if ratio >= threshold:
            return ratio
        else:
            return 0

    slack_matches = [(slack_user, match_for_user(slack_user)) for slack_user in slack_users]
    slack_matches = [(slack_user, match) for (slack_user, match) in slack_matches if match >= threshold]

    if len(slack_matches) > 0:
        matched_user = max(slack_matches, key=lambda pair: pair[1])[0]
        return "<@{id}>".format(id=matched_user['id'])
    else:
        return author_name
diff.py 文件源码 项目:zing 作者: evernote 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def opcodes(self):
        sm = difflib.SequenceMatcher(None,
                                     self.target.active_uids,
                                     self.new_unit_list)
        return sm.get_opcodes()
routing.py 文件源码 项目:Sci-Finder 作者: snverse 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)
routing.py 文件源码 项目:Sci-Finder 作者: snverse 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)
routing.py 文件源码 项目:harbour-sailfinder 作者: DylanVanAssche 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None
routing.py 文件源码 项目:harbour-sailfinder 作者: DylanVanAssche 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None
routing.py 文件源码 项目:Texty 作者: sarthfrey 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None
institutions_tags.py 文件源码 项目:epuap-watchdog 作者: ad-m 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def diff_text(a, b):
    s = SequenceMatcher(None, a, b)
    opcode = {'replace': lambda i1, i2, j1, j2: "<strike>%s</strike><strong>%s</strong>" % (a[i1:i2], b[j1:j2]),
              'delete': lambda i1, i2, j1, j2: "<strike>%s</strike>" % (a[i1:i2], ),
              'insert': lambda i1, i2, j1, j2: "<strong>%s</strong>" % (b[j1:j2], ),
              'equal': lambda i1, i2, j1, j2: a[i1:i2]}
    return safe("".join(opcode[tag](*args) for tag, *args in s.get_opcodes()))
unittests.py 文件源码 项目:OPMLtoMM 作者: adxsoft 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def print_diffs(expected,actual):
    a=expected
    b=actual
    s = SequenceMatcher(None,a,b)
    print '\n'
    ctr=0
    for block in s.get_matching_blocks():
        apos=block[0]
        bpos=block[0]
        aendpos=apos+block[2]
        bendpos=bpos+block[2]
        achunk=expected[apos:aendpos]
        bchunk=actual[bpos:bendpos]
        # print "a[%d] and b[%d] match for %d elements" % block
        print '\nACTUAL has matching Error at '+str(aendpos)
        print 'Expected ='+expected[bendpos:bendpos+100]+'\nFound    ='+actual[aendpos:aendpos+100]
        print 'Matched values from 0 to '+str(aendpos-1)+' are'
        print ' EXPECTED='+bchunk
        print ' ACTUAL  ='+achunk
        print ''
        if ctr==0:
            break
        else:
            ctr+=1

###########################################################################
## Unit Tests - OPML to MM conversions
###########################################################################
#
# These tests are designed to run in the local project folder opmltomm
validate_rapier.py 文件源码 项目:rapier 作者: apigee-labs 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def similar(self, a, b):
        return SequenceMatcher(None, a, b).ratio() > self.similarity_ratio
utilities.py 文件源码 项目:tsubasa-reddit-bot 作者: ArmandSyah 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
utils.py 文件源码 项目:bpy_lambda 作者: bcongdon 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_best_similar(data):
    import difflib
    key, use_similar, similar_pool = data

    # try to find some close key in existing messages...
    # Optimized code inspired by difflib.get_close_matches (as we only need the best match).
    # We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
    # as long as use_similar is not below ~0.7).
    # Gives an overall ~20% of improvement!
    #tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
    #if tmp:
        #tmp = tmp[0]
    tmp = None
    s = difflib.SequenceMatcher()
    s.set_seq2(key[1])
    len_key = len(key[1])
    min_len = len_key // 2
    max_len = len_key * 2
    for x in similar_pool:
        if min_len < len(x) < max_len:
            s.set_seq1(x)
            if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
                sratio = s.ratio()
                if sratio >= use_similar:
                    tmp = x
                    use_similar = sratio
    return key, tmp
match_name.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
    #return Differ(None, a, b).ratio()
routing.py 文件源码 项目:RPoint 作者: george17-meet 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)
fuzz.py 文件源码 项目:isni-reconcile 作者: cmh2166 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())
fuzz.py 文件源码 项目:isni-reconcile 作者: cmh2166 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def partial_ratio(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
    else:
        shorter = s2
        longer = s1

    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
    #   best score === ratio("abcd", "Xbcd")
    scores = []
    for block in blocks:
        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
        long_end = long_start + len(shorter)
        long_substr = longer[long_start:long_end]

        m2 = SequenceMatcher(None, shorter, long_substr)
        r = m2.ratio()
        if r > .995:
            return 100
        else:
            scores.append(r)

    return utils.intr(100 * max(scores))


##############################
# Advanced Scoring Functions #
##############################
routing.py 文件源码 项目:isni-reconcile 作者: cmh2166 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None
diff.py 文件源码 项目:isni-reconcile 作者: cmh2166 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def get_matching_blocks(self):
        size = min(len(self.b), len(self.b))
        threshold = min(self.threshold, size / 4)
        actual = difflib.SequenceMatcher.get_matching_blocks(self)
        return [item for item in actual
                if item[2] > threshold
                or not item[2]]
compare_transcripts.py 文件源码 项目:speech-to-text 作者: pluteski 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def ratcliff_obershelp_similarity(a, b):
    """
    A kind of approximate string matching.
    Computes the generalized Ratcliff/Obershelp similarity of two strings
    as the number of matching characters divided by the total number of characters in the two strings.
    Matching characters are those in the longest common subsequence plus,
    recursively matching characters in the unmatched region on either side of the longest common subsequence.
    """
    if a and b:
        return SequenceMatcher(None, a, b).ratio()
    else:
        return None


问题


面经


文章

微信
公众号

扫码关注公众号