def evaluate(predictions, labels, threshold=0.4, multi_label=True):
'''
True Positive : Label : 1, Prediction : 1
False Positive : Label : 0, Prediction : 1
False Negative : Label : 0, Prediction : 0
True Negative : Label : 1, Prediction : 0
Precision : TP/(TP + FP)
Recall : TP/(TP + FN)
F Score : 2.P.R/(P + R)
Ranking Loss : The average number of label pairs that are incorrectly ordered given predictions
Hammming Loss : The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
'''
assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,)
metrics = dict()
if not multi_label:
metrics['bae'] = BAE(labels, predictions)
labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1)
metrics['accuracy'] = accuracy_score(labels, predictions)
metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \
precision_recall_fscore_support(labels, predictions, average='micro')
metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \
metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \
= 0, 0, 0, 0, 0, 0, 0, 0
else:
metrics['coverage'] = coverage_error(labels, predictions)
metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions)
metrics['ranking_loss'] = label_ranking_loss(labels, predictions)
for i in range(predictions.shape[0]):
predictions[i, :][predictions[i, :] >= threshold] = 1
predictions[i, :][predictions[i, :] < threshold] = 0
metrics['bae'] = 0
metrics['patk'] = patk(predictions, labels)
metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)
return metrics
eval_performance.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录