gridding.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:the-magical-csv-merge-machine 作者: entrepreneur-interet-general 项目源码 文件源码
def score_tokens(src, ref, translate_tokens):
    if translate_tokens:
        return score_tokens(translate(src), translate(ref), False)
    # Returns a score in [0, 100]
    aTokens = validateTokens(src)
    bTokens = validateTokens(ref)
    a2 = ' '.join(aTokens)
    b2 = ' '.join(bTokens)
    tokenSortRatio = fuzz.token_sort_ratio(a2, b2)
    if tokenSortRatio < 40: 
        logging.debug('Rejected for TOKEN_SORT : {} / {}'.format(src, ref))
        return 0
    tokenSetRatio = fuzz.token_set_ratio(a2, b2)
    if tokenSetRatio < 50:
        logging.debug('Rejected for TOKEN_SET : {} / {}'.format(src, ref))
        return 0
    if REQUIRES_SHARED_PROPER_NOUN:
        aProper = ' '.join(filterProperNouns(aTokens))
        bProper = ' '.join(filterProperNouns(bTokens))
        # if(len(aProper) > 3 and len(bProper) > 3):
        if len(aProper) > 0 or len(bProper) > 0:
            properNounSortRatio = fuzz.token_sort_ratio(aProper, bProper)
            if properNounSortRatio < 80: 
                logging.debug('Rejected for PROPER_NOUN_SORT : {} / {}'.format(src, ref))
                return 0
            properNounSetRatio = fuzz.token_set_ratio(aProper, bProper)
            if properNounSetRatio < 60:
                logging.debug('Rejected for PROPER_NOUN_SET : {} / {}'.format(src, ref))
                return 0
    return tokenSortRatio * tokenSetRatio / 100
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号