def get_type_from_title(title):
engrol = RomanianHelper.englishize_romanian(title).lower()
stop_pos = len(title)
magic_keyword_search_result = re.search(r'(pentru|privind)', engrol)
if magic_keyword_search_result != None:
stop_pos = magic_keyword_search_result.start()
search_space = engrol[:stop_pos]
type_to_keywords = {
'HG': 'hotarare',
'OM': 'ordin',
'LEGE': 'lege',
'OG': 'ordonanta',
'OUG': 'ordonanta de urgenta'
}
final_type = None
max_ratio = 0
for key in type_to_keywords:
ratio = fuzz.ratio(type_to_keywords[key], search_space)
if ratio > max_ratio:
max_ratio = ratio
final_type = key
return final_type
python类ratio()的实例源码
def extractMentorsMentees(data):
# mentors = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentor")>90)])
# mentees = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentee")>90)])
mentors = data[data[cmap[4]] == "Mentor"]
mentees = data[data[cmap[4]] == "Mentee"]
mentors['xx'] = list(range(len(mentors)))
mentees['xx'] = list(range(len(mentees)))
return mentors, mentees
def scoreTheMatch(peer1,peer2,field_name):
return fuzz.ratio(peer1[field_name], peer2[field_name])
def asking_team(self, msg):
equipes = utils.get_list_of_equipes_popular_names() # String: 'Flamengo'
for equipe in equipes:
if fuzz.ratio(equipe, msg) > 49:
self.user.team_slug = msg.lower().replace(" ", "-")
self.user.team_popular_name = utils.get_popular_name_by_slug(self.user.team_slug)
self.user.team_id = utils.get_equipe_id_by_slug(self.user.team_slug)
if self.user.team_id is None:
break
self.state = State.CONFIRMING_TEAM
return TextResponse("Irado! ?? Seu time é o {}, né?".format(self.user.team_popular_name))
return TextResponse('Você entrou com um time inválido! Por favor, tente novamente.')
def lookup(self, query):
matches = process.extract(query, self.index.keys(), scorer=fuzz.ratio)
result = None
if query[-1] == '+':
for match in matches:
if match[0].find('+') != -1:
result = match[0]
break
else:
result = matches[0][0]
if result:
result = self.db[self.db.db_cfg.database].FEHData.find_one({'id': self.index[result]})
return result
def wiki(self, query, amount=5, threshold=50):
best = BestHandler()
best.add(0, ('HOME', WIKI_URL))
if query != '':
for name, link in self._wiki.items():
score = fuzz.ratio(query.lower(), name.split(ARROW_CHARACTER)[-1].strip().lower())
best.add(score, (name, link))
return best.to_list(amount, threshold)
def fuzzy_korean_ratio(str1: str, str2: str) -> int:
"""Fuzzy Search with Korean."""
return fuzz.ratio(
normalize_korean_nfc_to_nfd(str1),
normalize_korean_nfc_to_nfd(str2),
)
def html(bot, event: Message, sess, keyword: str):
"""
HTML ???? ??
`{PREFIX}html tbody` (`tbody` TAG? ?? ???? ??)
"""
try:
ref = sess.query(JSONCache).filter_by(name='html').one()
except NoResultFound:
await bot.say(
event.channel,
'?? ???? ?? ???? ????? ? ????. ??? ??????!'
)
return
name = None
link = None
ratio = -1
for _name, _link in ref.body:
_ratio = fuzz.ratio(keyword, _name)
if _ratio > ratio:
name = _name
link = _link
ratio = _ratio
if ratio > 40:
await bot.say(
event.channel,
f':html: `{name}` - {link}'
)
else:
await bot.say(
event.channel,
'??? HTML Element? ?? ?????!'
)
def css(bot, event: Message, sess, keyword: str):
"""
CSS ???? ??
`{PREFIX}css color` (`color` ? ?? ???? ??)
"""
try:
ref = sess.query(JSONCache).filter_by(name='css').one()
except NoResultFound:
await bot.say(
event.channel,
'?? ???? ?? ???? ????? ? ????. ??? ??????!'
)
return
name = None
link = None
ratio = -1
for _name, _link in ref.body:
_ratio = fuzz.ratio(keyword, _name)
if _ratio > ratio:
name = _name
link = _link
ratio = _ratio
if ratio > 40:
await bot.say(
event.channel,
f':css: `{name}` - {link}'
)
else:
await bot.say(
event.channel,
'??? CSS ?? ??? ?? ?????!'
)
def python(bot, event: Message, sess, keyword: str):
"""
Python library ???? ??
`{PREFIX}py re` (`re` ?? ??? ?? ???? ??)
"""
try:
ref = sess.query(JSONCache).filter_by(name='python').one()
except NoResultFound:
await bot.say(
event.channel,
'?? ???? ?? ???? ????? ? ????. ??? ??????!'
)
return
name = None
link = None
ratio = -1
for code, _name, _link in ref.body:
if code:
_ratio = fuzz.ratio(keyword, code)
else:
_ratio = fuzz.ratio(keyword, _name)
if _ratio > ratio:
name = _name
link = _link
ratio = _ratio
if ratio > 40:
await bot.say(
event.channel,
f':python: {name} - {link}'
)
else:
await bot.say(
event.channel,
'??? Python library? ?? ?????!'
)
def strict_compare_strings(string_one, string_two):
highest_ratio = 0
if fuzz.ratio(string_one, string_two) > highest_ratio:
highest_ratio = fuzz.ratio(string_one, string_two)
if fuzz.partial_ratio(string_one, string_two) > highest_ratio:
highest_ratio = fuzz.partial_ratio(string_one, string_two)
if fuzz.token_sort_ratio(string_one, string_two) > highest_ratio:
highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
if fuzz.token_set_ratio(string_one, string_two) > highest_ratio:
highest_ratio = fuzz.token_set_ratio(string_one, string_two)
return highest_ratio
def compare_strings(string_one, string_two):
highest_ratio = 0
if fuzz.ratio(string_one, string_two)>highest_ratio:
highest_ratio = fuzz.ratio(string_one, string_two)
if fuzz.token_sort_ratio(string_one, string_two)>highest_ratio:
highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
if fuzz.token_set_ratio(string_one, string_two)>highest_ratio:
highest_ratio = fuzz.token_set_ratio(string_one, string_two)
return highest_ratio
def _match_place_name_to_wiki_page(place_name, wiki_page_titles):
"""Work horse of `geosearch`: separated for easier testing & debugging.
For example places we can't yet match, see `test_wp._CHALLENGE_PLACE_NAME_TO_WIKI`.
Potential improvements:
- Change existing dials (for each pass?): local vars (e.g. _THRESHOLD), radius/limit kwarg to Wikipedia API
- Changes scorers on different passes, e.g. partial_ratio is more lenient than ratio.
- Modify full_process processor: it removes non-letter-number characters so wiki disambiguation markup can cause
undesired matching. For example, "Boulevard (restaurant)" becomes "boulevard restaurant", which matches
"mourad restaurant" at 79.
- Add additional processors:
- Modify plurals, articles, accents (full_process will just remove accented characters :( ).
- Remove city/state name occurences in wiki pages, e.g. "San Francisco Ferry Building" -> "Ferry Building"
could better match the Yelp "Ferry Building Marketplace" (disclaimer: US-centric)
- Modify place_name query string. These may be better than their "remove" counterparts because adding more
characters gives more information to try to match against and may produce more accurate results than removing characters.
- (reverse ^) add city/state to place names: "Ferry Building Marketplace" -> "San Francisco Ferry Building Marketplace"
- Reverse wiki_disambiguation_processor: add common wikipedia endings: (restaurant), (California), etc.
- Consider running most lenient processors first, moving towards more strict, like a filter. Right now we run the
strictest first.
"""
# We run multiple processor passes: if there is no match, the next processor may be more lenient.
for processor in _PLACE_NAME_TO_WIKI_PAGE_PROCESSORS:
matches = process.extractBests(place_name, wiki_page_titles, scorer=_SCORER, processor=processor,
score_cutoff=_THRESHOLD)
if len(matches) >= 1:
if len(matches) > 1:
print('More than one match above threshold', matches, file=sys.stderr)
return matches[0][0]
return None
def is_almost_equal(self, other):
name1 = self.name.lower()
name2 = other.name.lower()
return fuzz.ratio(name1, name2) >= MIN_FUZZY_RATIO
def fuzzy_fit(x, y):
"""
Returns whether x and y are similar in fuzzy string matching
:param x: the first mention
:param y: the second mention
:return: whether x and y are similar in fuzzy string matching
"""
if fuzz.ratio(x, y) >= 90:
return True
# Convert numbers to words
x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]
return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85
def update_ner_pubtator(self):
''' Process sentence tokens and see if any match to PubTator entity
mentions. If so, replace their token['ner'] with the PubTator NER
class (CHEMICAL, DISEASE, etc.)
'''
if self.pubtator:
for sent in self.sentences:
sentence_index = sent['index']
# are there any PubTator NER tags for this sentence?
if not self.pubtator.sentence_ner[sentence_index]:
continue
# process pubtator NER! (read CoreNLP tokens, see any of them match exactly...)
for t in sent['tokens']:
for biothing in self.pubtator.sentence_ner[sentence_index]:
start, end = biothing.corenlp_offsets
if t['characterOffsetBegin'] == start and t['characterOffsetEnd'] == end:
# exact match! update CoreNLP NER with PubTator NER
biothing.matched_corenlp_token = t['index']
t['ner'] = biothing.ner_type
break
elif fuzz and self.fuzzy_ner_match:
if fuzz.ratio(t['originalText'].lower(), biothing.token.lower()) > self.fuzzy_ner_match:
biothing.matched_corenlp_token = t['index']
t['ner'] = biothing.ner_type
break
self.pubtator_ner_updated = True
return self.pubtator_ner_updated
def get_best_match(self, input, corpus, tolerance):
cartesian = itr.product(input, corpus)
max_match = 0
max_p = ""
max_q = ""
for p, q in cartesian:
match_percentage = fuzz.ratio(p, q)
if(match_percentage > max_match):
max_match = match_percentage
max_p = p
max_q = q
return max_p, max_q
def normalizeMalwareNamesStep1(malwarenames):
# malwarenames-list to string
names = " ".join(malwarenames)
for trn in TRENNER:
names = names.replace(trn, " ").lower()
for key in sorted(MAPPING, key=len, reverse=True):
names = names.replace(key, MAPPING[key])
return names
# similarity from the ratio, token_sort and token_set ratio methods in FuzzyWuzzy
def computeSimilarity(s1, s2):
return 1.0 - (0.01 * max(
fuzz.ratio(s1, s2),
fuzz.token_sort_ratio(s1, s2),
fuzz.token_set_ratio(s1, s2)))