def f1_metric (solution, prediction, task='binary.classification'):
''' Compute the normalized f1 measure. The binarization differs
for the multi-label and multi-class case.
A non-weighted average over classes is taken.
The score is normalized.'''
label_num = solution.shape[1]
score = np.zeros(label_num)
bin_prediction = binarize_predictions(prediction, task)
[tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
# Bounding to avoid division by 0
eps = 1e-15
true_pos_num = sp.maximum (eps, tp+fn)
found_pos_num = sp.maximum (eps, tp+fp)
tp = sp.maximum (eps, tp)
tpr = tp / true_pos_num # true positive rate (recall)
ppv = tp / found_pos_num # positive predictive value (precision)
arithmetic_mean = 0.5 * sp.maximum (eps, tpr+ppv)
# Harmonic mean:
f1 = tpr*ppv/arithmetic_mean
# Average over all classes
f1 = mvmean(f1)
# Normalize: 0 for random, 1 for perfect
if (task != 'multiclass.classification') or (label_num==1):
# How to choose the "base_f1"?
# For the binary/multilabel classification case, one may want to predict all 1.
# In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos)
# frac_pos = mvmean(solution.ravel())
# base_f1 = 2 * frac_pos / (1+frac_pos)
# or predict random values with probability 0.5, in which case
# base_f1 = 0.5
# the first solution is better only if frac_pos > 1/3.
# The solution in which we predict according to the class prior frac_pos gives
# f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5
# So, because the f1 score is used if frac_pos is small (typically <0.1)
# the best is to assume that base_f1=0.5
base_f1 = 0.5
# For the multiclass case, this is not possible (though it does not make much sense to
# use f1 for multiclass problems), so the best would be to assign values at random to get
# tpr=ppv=frac_pos, where frac_pos=1/label_num
else:
base_f1=1./label_num
score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1))
return score
评论列表
文章目录