libscores.py 文件源码-python代码片段

def f1_metric (solution, prediction, task='binary.classification'):
    ''' Compute the normalized f1 measure. The binarization differs 
        for the multi-label and multi-class case. 
        A non-weighted average over classes is taken.
        The score is normalized.'''
    label_num = solution.shape[1]
    score = np.zeros(label_num)
    bin_prediction = binarize_predictions(prediction, task)
    [tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
    # Bounding to avoid division by 0
    eps = 1e-15
    true_pos_num = sp.maximum (eps, tp+fn)
    found_pos_num = sp.maximum (eps, tp+fp)
    tp = sp.maximum (eps, tp)
    tpr = tp / true_pos_num      # true positive rate (recall)
    ppv = tp / found_pos_num     # positive predictive value (precision)
    arithmetic_mean = 0.5 * sp.maximum (eps, tpr+ppv)
    # Harmonic mean:
    f1 = tpr*ppv/arithmetic_mean
    # Average over all classes
    f1 = mvmean(f1)
    # Normalize: 0 for random, 1 for perfect
    if (task != 'multiclass.classification') or (label_num==1):
    # How to choose the "base_f1"?
    # For the binary/multilabel classification case, one may want to predict all 1.
    # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos)
    #     frac_pos = mvmean(solution.ravel())
    #     base_f1 = 2 * frac_pos / (1+frac_pos)
    # or predict random values with probability 0.5, in which case
    #     base_f1 = 0.5
    # the first solution is better only if frac_pos > 1/3.
    # The solution in which we predict according to the class prior frac_pos gives
    # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5
    # So, because the f1 score is used if frac_pos is small (typically <0.1)
    # the best is to assume that base_f1=0.5
        base_f1 = 0.5
    # For the multiclass case, this is not possible (though it does not make much sense to
    # use f1 for multiclass problems), so the best would be to assign values at random to get 
    # tpr=ppv=frac_pos, where frac_pos=1/label_num
    else:
        base_f1=1./label_num
    score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1))
    return score