def closest_rule(self, adapter):
def score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=score_rule)
else:
return None
python类SequenceMatcher()的实例源码
def match_user(slack_users, author_name, threshold=0.6):
"""
Do a fuzzy match of author name to full name. If it matches, return a formatted Slack handle. Else return original
full name.
Args:
slack_users (list of dict): A list of slack users from their API
author_name (str): The commit author's full name
threshold (float): All matches must be at least this high to pass.
Returns:
str: The slack markup for the handle of that author.
If one can't be found, the author's name is returned unaltered.
"""
lower_author_name = reformatted_full_name(author_name)
def match_for_user(slack_user):
"""Get match ratio for slack user, or 0 if below threshold"""
lower_name = reformatted_full_name(slack_user['profile']['real_name'])
ratio = SequenceMatcher(a=lower_author_name, b=lower_name).ratio()
if ratio >= threshold:
return ratio
else:
return 0
slack_matches = [(slack_user, match_for_user(slack_user)) for slack_user in slack_users]
slack_matches = [(slack_user, match) for (slack_user, match) in slack_matches if match >= threshold]
if len(slack_matches) > 0:
matched_user = max(slack_matches, key=lambda pair: pair[1])[0]
return "<@{id}>".format(id=matched_user['id'])
else:
return author_name
def opcodes(self):
sm = difflib.SequenceMatcher(None,
self.target.active_uids,
self.new_unit_list)
return sm.get_opcodes()
def closest_rule(self, adapter):
def _score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=_score_rule)
def closest_rule(self, adapter):
def _score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=_score_rule)
def closest_rule(self, adapter):
def score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=score_rule)
else:
return None
def closest_rule(self, adapter):
def score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=score_rule)
else:
return None
def closest_rule(self, adapter):
def score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=score_rule)
else:
return None
def diff_text(a, b):
s = SequenceMatcher(None, a, b)
opcode = {'replace': lambda i1, i2, j1, j2: "<strike>%s</strike><strong>%s</strong>" % (a[i1:i2], b[j1:j2]),
'delete': lambda i1, i2, j1, j2: "<strike>%s</strike>" % (a[i1:i2], ),
'insert': lambda i1, i2, j1, j2: "<strong>%s</strong>" % (b[j1:j2], ),
'equal': lambda i1, i2, j1, j2: a[i1:i2]}
return safe("".join(opcode[tag](*args) for tag, *args in s.get_opcodes()))
def print_diffs(expected,actual):
a=expected
b=actual
s = SequenceMatcher(None,a,b)
print '\n'
ctr=0
for block in s.get_matching_blocks():
apos=block[0]
bpos=block[0]
aendpos=apos+block[2]
bendpos=bpos+block[2]
achunk=expected[apos:aendpos]
bchunk=actual[bpos:bendpos]
# print "a[%d] and b[%d] match for %d elements" % block
print '\nACTUAL has matching Error at '+str(aendpos)
print 'Expected ='+expected[bendpos:bendpos+100]+'\nFound ='+actual[aendpos:aendpos+100]
print 'Matched values from 0 to '+str(aendpos-1)+' are'
print ' EXPECTED='+bchunk
print ' ACTUAL ='+achunk
print ''
if ctr==0:
break
else:
ctr+=1
###########################################################################
## Unit Tests - OPML to MM conversions
###########################################################################
#
# These tests are designed to run in the local project folder opmltomm
def similar(self, a, b):
return SequenceMatcher(None, a, b).ratio() > self.similarity_ratio
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
def get_best_similar(data):
import difflib
key, use_similar, similar_pool = data
# try to find some close key in existing messages...
# Optimized code inspired by difflib.get_close_matches (as we only need the best match).
# We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
# as long as use_similar is not below ~0.7).
# Gives an overall ~20% of improvement!
#tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
#if tmp:
#tmp = tmp[0]
tmp = None
s = difflib.SequenceMatcher()
s.set_seq2(key[1])
len_key = len(key[1])
min_len = len_key // 2
max_len = len_key * 2
for x in similar_pool:
if min_len < len(x) < max_len:
s.set_seq1(x)
if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
sratio = s.ratio()
if sratio >= use_similar:
tmp = x
use_similar = sratio
return key, tmp
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
#return Differ(None, a, b).ratio()
def closest_rule(self, adapter):
def _score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=_score_rule)
def ratio(s1, s2):
s1, s2 = utils.make_type_consistent(s1, s2)
m = SequenceMatcher(None, s1, s2)
return utils.intr(100 * m.ratio())
def partial_ratio(s1, s2):
""""Return the ratio of the most similar substring
as a number between 0 and 100."""
s1, s2 = utils.make_type_consistent(s1, s2)
if len(s1) <= len(s2):
shorter = s1
longer = s2
else:
shorter = s2
longer = s1
m = SequenceMatcher(None, shorter, longer)
blocks = m.get_matching_blocks()
# each block represents a sequence of matching characters in a string
# of the form (idx_1, idx_2, len)
# the best partial match will block align with at least one of those blocks
# e.g. shorter = "abcd", longer = XXXbcdeEEE
# block = (1,3,3)
# best score === ratio("abcd", "Xbcd")
scores = []
for block in blocks:
long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
long_end = long_start + len(shorter)
long_substr = longer[long_start:long_end]
m2 = SequenceMatcher(None, shorter, long_substr)
r = m2.ratio()
if r > .995:
return 100
else:
scores.append(r)
return utils.intr(100 * max(scores))
##############################
# Advanced Scoring Functions #
##############################
def closest_rule(self, adapter):
def score_rule(rule):
return sum([
0.98 * difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods)
])
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=score_rule)
else:
return None
def get_matching_blocks(self):
size = min(len(self.b), len(self.b))
threshold = min(self.threshold, size / 4)
actual = difflib.SequenceMatcher.get_matching_blocks(self)
return [item for item in actual
if item[2] > threshold
or not item[2]]
def ratcliff_obershelp_similarity(a, b):
"""
A kind of approximate string matching.
Computes the generalized Ratcliff/Obershelp similarity of two strings
as the number of matching characters divided by the total number of characters in the two strings.
Matching characters are those in the longest common subsequence plus,
recursively matching characters in the unmatched region on either side of the longest common subsequence.
"""
if a and b:
return SequenceMatcher(None, a, b).ratio()
else:
return None