def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
评论列表
文章目录