util.py 文件源码-python代码片段

def log_loss(solution, prediction, task=BINARY_CLASSIFICATION):
    """Log loss for binary and multiclass."""
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction)  # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == MULTICLASS_CLASSIFICATION) and (label_num > 1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k, :] /= sp.maximum(norma[k], eps)
        # Make sure there is a single label active per line for multi-class
        # classification
        sol = binarize_predictions(solution, task=MULTICLASS_CLASSIFICATION)
        # For the base prediction, this solution is ridiculous in the
        # multi-label case

        # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum(1 - eps, sp.maximum(eps, pred))
    # Compute the log loss
    pos_class_log_loss = -np.mean(sol * np.log(pred), axis=0)
    if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = -np.mean((1 - sol) * np.log(1 - pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss)
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss)
        # print('multiclass {}'.format(log_loss))
    return log_loss