def log_loss(solution, prediction, task=BINARY_CLASSIFICATION):
"""Log loss for binary and multiclass."""
[sample_num, label_num] = solution.shape
eps = 1e-15
pred = np.copy(prediction) # beware: changes in prediction occur through this
sol = np.copy(solution)
if (task == MULTICLASS_CLASSIFICATION) and (label_num > 1):
# Make sure the lines add up to one for multi-class classification
norma = np.sum(prediction, axis=1)
for k in range(sample_num):
pred[k, :] /= sp.maximum(norma[k], eps)
# Make sure there is a single label active per line for multi-class
# classification
sol = binarize_predictions(solution, task=MULTICLASS_CLASSIFICATION)
# For the base prediction, this solution is ridiculous in the
# multi-label case
# Bounding of predictions to avoid log(0),1/0,...
pred = sp.minimum(1 - eps, sp.maximum(eps, pred))
# Compute the log loss
pos_class_log_loss = -np.mean(sol * np.log(pred), axis=0)
if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1):
# The multi-label case is a bunch of binary problems.
# The second class is the negative class for each column.
neg_class_log_loss = -np.mean((1 - sol) * np.log(1 - pred), axis=0)
log_loss = pos_class_log_loss + neg_class_log_loss
# Each column is an independent problem, so we average.
# The probabilities in one line do not add up to one.
# log_loss = mvmean(log_loss)
# print('binary {}'.format(log_loss))
# In the multilabel case, the right thing i to AVERAGE not sum
# We return all the scores so we can normalize correctly later on
else:
# For the multiclass case the probabilities in one line add up one.
log_loss = pos_class_log_loss
# We sum the contributions of the columns.
log_loss = np.sum(log_loss)
# print('multiclass {}'.format(log_loss))
return log_loss
评论列表
文章目录