def compute_score(self, gts, res):
"""
Computes Rouge-L score given a set of reference and candidate sentences for the dataset
Invoked by evaluate_captions.py
:param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values
:param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
:returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
"""
assert(gts.keys() == res.keys())
imgIds = gts.keys()
score = []
for id in imgIds:
hypo = res[id]
ref = gts[id]
score.append(self.calc_score(hypo, ref))
# Sanity check.
assert(type(hypo) is list)
assert(len(hypo) == 1)
assert(type(ref) is list)
assert(len(ref) > 0)
average_score = np.mean(np.array(score))
return average_score, np.array(score)
评论列表
文章目录