def score_tokens(src, ref, translate_tokens):
if translate_tokens:
return score_tokens(translate(src), translate(ref), False)
# Returns a score in [0, 100]
aTokens = validateTokens(src)
bTokens = validateTokens(ref)
a2 = ' '.join(aTokens)
b2 = ' '.join(bTokens)
tokenSortRatio = fuzz.token_sort_ratio(a2, b2)
if tokenSortRatio < 40:
logging.debug('Rejected for TOKEN_SORT : {} / {}'.format(src, ref))
return 0
tokenSetRatio = fuzz.token_set_ratio(a2, b2)
if tokenSetRatio < 50:
logging.debug('Rejected for TOKEN_SET : {} / {}'.format(src, ref))
return 0
if REQUIRES_SHARED_PROPER_NOUN:
aProper = ' '.join(filterProperNouns(aTokens))
bProper = ' '.join(filterProperNouns(bTokens))
# if(len(aProper) > 3 and len(bProper) > 3):
if len(aProper) > 0 or len(bProper) > 0:
properNounSortRatio = fuzz.token_sort_ratio(aProper, bProper)
if properNounSortRatio < 80:
logging.debug('Rejected for PROPER_NOUN_SORT : {} / {}'.format(src, ref))
return 0
properNounSetRatio = fuzz.token_set_ratio(aProper, bProper)
if properNounSetRatio < 60:
logging.debug('Rejected for PROPER_NOUN_SET : {} / {}'.format(src, ref))
return 0
return tokenSortRatio * tokenSetRatio / 100
gridding.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录