def score_icon_plain(ref_file, hyp_file, n_significance_tests=20):
ref_tags = read_tag_file(ref_file)
hyp_tags = read_tag_file(hyp_file)
assert len(ref_tags) == len(hyp_tags), 'ref file and hyp file must have the same number of tags'
for ref_line, hyp_line in zip(ref_tags, hyp_tags):
assert len(ref_line) == len(hyp_line), 'ref line and hyp line must have the same number of tags'
# flatten out tags
flat_ref_tags = [t for l in ref_tags for t in l]
flat_hyp_tags = [t for l in hyp_tags for t in l]
actual_class_f1 = f1_score(flat_ref_tags, flat_hyp_tags, average=None)
actual_average_f1 = weighted_fmeasure(flat_ref_tags, flat_hyp_tags)
# END EVALUATION
return [actual_class_f1, actual_average_f1]
评论列表
文章目录