def log_loss(solution, prediction, task = 'binary.classification'):
''' Log loss for binary and multiclass. '''
[sample_num, label_num] = solution.shape
eps = 1e-15
pred = np.copy(prediction) # beware: changes in prediction occur through this
sol = np.copy(solution)
if (task == 'multiclass.classification') and (label_num>1):
# Make sure the lines add up to one for multi-class classification
norma = np.sum(prediction, axis=1)
for k in range(sample_num):
pred[k,:] /= sp.maximum (norma[k], eps)
# Make sure there is a single label active per line for multi-class classification
sol = binarize_predictions(solution, task='multiclass.classification')
# For the base prediction, this solution is ridiculous in the multi-label case
# Bounding of predictions to avoid log(0),1/0,...
pred = sp.minimum (1-eps, sp.maximum (eps, pred))
# Compute the log loss
pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
if (task != 'multiclass.classification') or (label_num==1):
# The multi-label case is a bunch of binary problems.
# The second class is the negative class for each column.
neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
log_loss = pos_class_log_loss + neg_class_log_loss
# Each column is an independent problem, so we average.
# The probabilities in one line do not add up to one.
# log_loss = mvmean(log_loss)
# print('binary {}'.format(log_loss))
# In the multilabel case, the right thing i to AVERAGE not sum
# We return all the scores so we can normalize correctly later on
else:
# For the multiclass case the probabilities in one line add up one.
log_loss = pos_class_log_loss
# We sum the contributions of the columns.
log_loss = np.sum(log_loss)
#print('multiclass {}'.format(log_loss))
return log_loss
评论列表
文章目录