def calculate_scores(predicted, gold, inspect=False, topocluster=False):
"""
Given the predictions and the gold annotations, calculate precision, recall, F Score and accuracy.
:param topocluster: Topocluster geoparser produces NON-STANDARD output so has to be treated differently
:param inspect: If True, the differences between gold and predicted files will be printed
:param predicted: path to the file with parser predictions
:param gold: path to the file with gold annotations
:return: a list of errors per toponym i.e how far away is each correctly identified toponym from
the gold location. This is used to measure the accuracy of the geocoding part
"""
tp, fp, fn = 0.0, 0.0, 0.0
accuracy = {}
wiki = True if "wiki" in predicted else False
predictions_file = codecs.open(predicted)
gold = codecs.open(gold)
toponym_index = -1
for predicted, gold in zip(predictions_file, gold):
predicted_tops = predicted.split("||")[:-1]
gold_tops = gold.split("||")[:-1]
for gold_top in gold_tops[:]:
toponym_index += 1
gold_top_items = gold_top.split(",,")
for predicted_top in predicted_tops[:]:
predicted_top_items = predicted_top.split(",,")
mean_g = (int(gold_top_items[4]) + int(gold_top_items[5])) / 2.0
mean_p = (int(predicted_top_items[4]) + int(predicted_top_items[5])) / 2.0
# If the toponym position (its mean) is no more than 9 characters from gold AND the two
# strings are equal then it's a match. For reasons to do with UTF-8 encoding and decoding,
# the toponym indices may, in a few instances, be off by a few positions when using Web APIs.
match = False # A flag to establish whether this is a matching prediction
if topocluster: # Only match for the toponym name in this instance
if predicted_top_items[1].lower() == gold_top_items[1].lower():
match = True
elif abs(mean_g - mean_p) < 10 and predicted_top_items[1].lower() == gold_top_items[1].lower():
match = True # Change the number above to 0 for EXACT matches, 10 for INEXACT matches
if match:
tp += 1
predicted_tops.remove(predicted_top)
gold_tops.remove(gold_top)
predicted_coord = (float(predicted_top_items[2]), float(predicted_top_items[3]))
gold_coord = (float(gold_top_items[2]), float(gold_top_items[3]))
accuracy[toponym_index] = numpy.log(1 + great_circle(predicted_coord, gold_coord).kilometers)
break
if not wiki:
fp += len(predicted_tops)
fn += len(gold_tops)
if inspect:
if len(predicted_tops) > 0 or 0 < len(gold_tops):
print "Predicted:", " - ".join(predicted_tops)
print "Gold Tops:", " - ".join(gold_tops)
f_score = (tp, fp, fn)
output = {"f_score": f_score, "accuracy": accuracy}
return output
评论列表
文章目录