python类ratio()的实例源码

legal.py 文件源码 项目:czl-scrape 作者: code4romania 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def get_type_from_title(title):
        engrol = RomanianHelper.englishize_romanian(title).lower()

        stop_pos = len(title)
        magic_keyword_search_result = re.search(r'(pentru|privind)', engrol)
        if magic_keyword_search_result != None:
            stop_pos = magic_keyword_search_result.start()

        search_space = engrol[:stop_pos]

        type_to_keywords = {
            'HG': 'hotarare',
            'OM': 'ordin',
            'LEGE': 'lege',
            'OG': 'ordonanta',
            'OUG': 'ordonanta de urgenta'
        }

        final_type = None
        max_ratio = 0

        for key in type_to_keywords:
            ratio = fuzz.ratio(type_to_keywords[key], search_space)
            if ratio > max_ratio:
                max_ratio = ratio
                final_type = key

        return final_type
matching.py 文件源码 项目:MentorMenteeMatching 作者: datacommunitydc 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def extractMentorsMentees(data):
  # mentors = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentor")>90)])
  # mentees = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentee")>90)])
  mentors = data[data[cmap[4]] == "Mentor"]
  mentees = data[data[cmap[4]] == "Mentee"]
  mentors['xx'] = list(range(len(mentors)))
  mentees['xx'] = list(range(len(mentees)))
  return mentors, mentees
matching.py 文件源码 项目:MentorMenteeMatching 作者: datacommunitydc 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def scoreTheMatch(peer1,peer2,field_name):
  return fuzz.ratio(peer1[field_name], peer2[field_name])
conversation.py 文件源码 项目:globot 作者: pedroeusebio 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def asking_team(self, msg):
        equipes = utils.get_list_of_equipes_popular_names() # String: 'Flamengo'
        for equipe in equipes:
            if fuzz.ratio(equipe, msg) > 49:
                self.user.team_slug = msg.lower().replace(" ", "-")
                self.user.team_popular_name = utils.get_popular_name_by_slug(self.user.team_slug)
                self.user.team_id = utils.get_equipe_id_by_slug(self.user.team_slug)
                if self.user.team_id is None:
                    break
                self.state = State.CONFIRMING_TEAM
                return TextResponse("Irado! ?? Seu time é o {}, né?".format(self.user.team_popular_name))
        return TextResponse('Você entrou com um time inválido! Por favor, tente novamente.')
feh_core.py 文件源码 项目:apex-sigma-plugins 作者: lu-ci 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def lookup(self, query):
        matches = process.extract(query, self.index.keys(), scorer=fuzz.ratio)
        result = None
        if query[-1] == '+':
            for match in matches:
                if match[0].find('+') != -1:
                    result = match[0]
                    break
        else:
            result = matches[0][0]
        if result:
            result = self.db[self.db.db_cfg.database].FEHData.find_one({'id': self.index[result]})
        return result
search.py 文件源码 项目:rules-bot 作者: bvanrijn 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def wiki(self, query, amount=5, threshold=50):
        best = BestHandler()
        best.add(0, ('HOME', WIKI_URL))
        if query != '':
            for name, link in self._wiki.items():
                score = fuzz.ratio(query.lower(), name.split(ARROW_CHARACTER)[-1].strip().lower())
                best.add(score, (name, link))

        return best.to_list(amount, threshold)
util.py 文件源码 项目:yui 作者: item4 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fuzzy_korean_ratio(str1: str, str2: str) -> int:
    """Fuzzy Search with Korean."""

    return fuzz.ratio(
        normalize_korean_nfc_to_nfd(str1),
        normalize_korean_nfc_to_nfd(str2),
    )
ref.py 文件源码 项目:yui 作者: item4 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def html(bot, event: Message, sess, keyword: str):
    """
    HTML ???? ??

    `{PREFIX}html tbody` (`tbody` TAG? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='html').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':html: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? HTML Element? ?? ?????!'
        )
ref.py 文件源码 项目:yui 作者: item4 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def css(bot, event: Message, sess, keyword: str):
    """
    CSS ???? ??

    `{PREFIX}css color` (`color` ? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='css').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':css: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? CSS ?? ??? ?? ?????!'
        )
ref.py 文件源码 项目:yui 作者: item4 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def python(bot, event: Message, sess, keyword: str):
    """
    Python library ???? ??

    `{PREFIX}py re` (`re` ?? ??? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='python').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for code, _name, _link in ref.body:
        if code:
            _ratio = fuzz.ratio(keyword, code)
        else:
            _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':python: {name} - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? Python library? ?? ?????!'
        )
utils.py 文件源码 项目:ModTools 作者: MattBSG 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def strict_compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.partial_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.partial_ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio
utils.py 文件源码 项目:ModTools 作者: MattBSG 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio
wp.py 文件源码 项目:prox-server 作者: mozilla-mobile 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _match_place_name_to_wiki_page(place_name, wiki_page_titles):
    """Work horse of `geosearch`: separated for easier testing & debugging.

    For example places we can't yet match, see `test_wp._CHALLENGE_PLACE_NAME_TO_WIKI`.

    Potential improvements:
    - Change existing dials (for each pass?): local vars (e.g. _THRESHOLD), radius/limit kwarg to Wikipedia API
    - Changes scorers on different passes, e.g. partial_ratio is more lenient than ratio.
    - Modify full_process processor: it removes non-letter-number characters so wiki disambiguation markup can cause
      undesired matching. For example, "Boulevard (restaurant)" becomes "boulevard  restaurant", which matches
      "mourad restaurant" at 79.
    - Add additional processors:
      - Modify plurals, articles, accents (full_process will just remove accented characters :( ).
      - Remove city/state name occurences in wiki pages, e.g. "San Francisco Ferry Building" -> "Ferry Building"
        could better match the Yelp "Ferry Building Marketplace" (disclaimer: US-centric)
    - Modify place_name query string. These may be better than their "remove" counterparts because adding more
      characters gives more information to try to match against and may produce more accurate results than removing characters.
      - (reverse ^) add city/state to place names: "Ferry Building Marketplace" -> "San Francisco Ferry Building Marketplace"
      - Reverse wiki_disambiguation_processor: add common wikipedia endings: (restaurant), (California), etc.
    - Consider running most lenient processors first, moving towards more strict, like a filter. Right now we run the
      strictest first.
    """
    # We run multiple processor passes: if there is no match, the next processor may be more lenient.
    for processor in _PLACE_NAME_TO_WIKI_PAGE_PROCESSORS:
        matches = process.extractBests(place_name, wiki_page_titles, scorer=_SCORER, processor=processor,
                                       score_cutoff=_THRESHOLD)
        if len(matches) >= 1:
            if len(matches) > 1:
                print('More than one match above threshold', matches, file=sys.stderr)
            return matches[0][0]
    return None
base.py 文件源码 项目:cinebot 作者: Nekmo 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def is_almost_equal(self, other):
        name1 = self.name.lower()
        name2 = other.name.lower()
        return fuzz.ratio(name1, name2) >= MIN_FUZZY_RATIO
eval_entity_coref.py 文件源码 项目:OKR 作者: vered1986 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def fuzzy_fit(x, y):
    """
    Returns whether x and y are similar in fuzzy string matching
    :param x: the first mention
    :param y: the second mention
    :return: whether x and y are similar in fuzzy string matching
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85
corenlp_parse.py 文件源码 项目:bioshovel 作者: SuLab 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def update_ner_pubtator(self):

        ''' Process sentence tokens and see if any match to PubTator entity
            mentions. If so, replace their token['ner'] with the PubTator NER
            class (CHEMICAL, DISEASE, etc.)
        '''

        if self.pubtator:
            for sent in self.sentences:
                sentence_index = sent['index']

                # are there any PubTator NER tags for this sentence?
                if not self.pubtator.sentence_ner[sentence_index]:
                    continue

                # process pubtator NER! (read CoreNLP tokens, see any of them match exactly...)
                for t in sent['tokens']:
                    for biothing in self.pubtator.sentence_ner[sentence_index]:
                        start, end = biothing.corenlp_offsets
                        if t['characterOffsetBegin'] == start and t['characterOffsetEnd'] == end:
                            # exact match! update CoreNLP NER with PubTator NER
                            biothing.matched_corenlp_token = t['index']
                            t['ner'] = biothing.ner_type
                            break
                        elif fuzz and self.fuzzy_ner_match:
                            if fuzz.ratio(t['originalText'].lower(), biothing.token.lower()) > self.fuzzy_ner_match:
                                biothing.matched_corenlp_token = t['index']
                                t['ner'] = biothing.ner_type
                                break
            self.pubtator_ner_updated = True

        return self.pubtator_ner_updated
BiographyAnalyzer.py 文件源码 项目:GitHub-Recommender 作者: himangshunits 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_best_match(self, input, corpus, tolerance):
        cartesian = itr.product(input, corpus)
        max_match = 0
        max_p = ""
        max_q = ""
        for p, q in cartesian:
            match_percentage = fuzz.ratio(p, q)
            if(match_percentage > max_match):
                max_match = match_percentage
                max_p = p
                max_q = q
        return max_p, max_q
vtTool.py 文件源码 项目:Snakepit 作者: K4lium 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def normalizeMalwareNamesStep1(malwarenames):
    # malwarenames-list to string
    names = " ".join(malwarenames)
    for trn in TRENNER:
        names = names.replace(trn, " ").lower()

    for key in sorted(MAPPING, key=len, reverse=True):
        names = names.replace(key, MAPPING[key])

    return names

# similarity from the ratio, token_sort and token_set ratio methods in FuzzyWuzzy
vtTool.py 文件源码 项目:Snakepit 作者: K4lium 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def computeSimilarity(s1, s2):
    return 1.0 - (0.01 * max(
        fuzz.ratio(s1, s2),
        fuzz.token_sort_ratio(s1, s2),
        fuzz.token_set_ratio(s1, s2)))


问题


面经


文章

微信
公众号

扫码关注公众号