def _f1_score(pred, answers):
"""Compute the F1 score."""
def _score(g_tokens, a_tokens):
common = Counter(g_tokens) & Counter(a_tokens)
num_same = sum(common.values())
if num_same == 0:
return 0
precision = 1. * num_same / len(g_tokens)
recall = 1. * num_same / len(a_tokens)
f1 = (2 * precision * recall) / (precision + recall)
return f1
if pred is None or answers is None:
return 0
g_tokens = _normalize_answer(pred).split()
scores = [_score(g_tokens, _normalize_answer(a).split()) for a in answers]
return max(scores)
评论列表
文章目录