def _multinomial_loss_grad(w, X, Y, alpha, sample_weight):
"""Computes the multinomial loss, gradient and class probabilities.
Parameters
----------
w : ndarray, shape (n_classes * n_features,) or
(n_classes * (n_features + 1),)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
Y : ndarray, shape (n_samples, n_classes)
Transformed labels according to the output of LabelBinarizer.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
Returns
-------
loss : float
Multinomial loss.
grad : ndarray, shape (n_classes * n_features,) or
(n_classes * (n_features + 1),)
Ravelled gradient of the multinomial loss.
p : ndarray, shape (n_samples, n_classes)
Estimated class probabilities
Reference
---------
Bishop, C. M. (2006). Pattern recognition and machine learning.
Springer. (Chapter 4.3.4)
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
fit_intercept = (w.size == n_classes * (n_features + 1))
grad = np.zeros((n_classes, n_features + bool(fit_intercept)))
alpha = alpha.reshape(n_classes, -1)
loss, p, w = _multinomial_loss(w, X, Y, alpha, sample_weight)
sample_weight = sample_weight[:, np.newaxis]
diff = sample_weight * (p - Y)
grad[:, :n_features] = safe_sparse_dot(diff.T, X)
grad[:, :n_features] += (alpha + L2_REG) * w
if fit_intercept:
grad[:, -1] = diff.sum(axis=0)
return loss, grad.ravel(), p
评论列表
文章目录