def __init__(self, preds, true_vals, ranks, raw_ranks):
self.preds = preds
self.ranks = ranks
self.true_vals = true_vals
self.raw_ranks = raw_ranks
#Test if not all the prediction are the same, sometimes happens with overfitting,
#and leads scikit-learn to output incorrect average precision (i.e ap=1)
if not (preds == preds[0]).all() :
#Due to the use of np.isclose in sklearn.metrics.ranking._binary_clf_curve (called by following metrics function),
#I have to rescale the predictions if they are too small:
preds_rescaled = preds
diffs = np.diff(np.sort(preds))
min_diff = min(abs(diffs[np.nonzero(diffs)]))
if min_diff < 1e-8 : #Default value of absolute tolerance of np.isclose
preds_rescaled = (preds * ( 1e-7 / min_diff )).astype('d')
self.ap = sklearn.metrics.average_precision_score(true_vals,preds_rescaled)
self.precision, self.recall, self.thresholds = sklearn.metrics.precision_recall_curve(true_vals,preds_rescaled)
else:
logger.warning("All prediction scores are equal, probable overfitting, replacing scores by random scores")
self.ap = (true_vals == 1).sum() / float(len(true_vals))
self.thresholds = preds[0]
self.precision = (true_vals == 1).sum() / float(len(true_vals))
self.recall = 0.5
self.mrr =-1
self.raw_mrr =-1
if ranks is not None:
self.mrr = np.mean(1.0 / ranks)
self.raw_mrr = np.mean(1.0 / raw_ranks)
评论列表
文章目录