classification_metrics.py 文件源码-python代码片段

def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """

    debug_flag = False
    sample_num, label_num = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * sum(solution)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num):
            base_prediction[k, :] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)
        diff = np.array(abs(the_base_log_loss - base_log_loss))
        if len(diff.shape) > 0:
            diff = max(diff)
        if (diff) > 1e-10:
            print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = np.mean(np.exp(-the_log_loss))
    base_pac = np.mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score