def logistic_grad(w, X, Y, alpha):
"""
Implementation of the logistic loss gradient when Y is a multi-ary
probability distribution.
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
fit_intercept = w.size == (n_classes * (n_features + 1))
grad = np.zeros((n_classes, n_features + int(fit_intercept)))
w = w.reshape(n_classes, -1)
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
intercept = 0
z = safe_sparse_dot(X, w.T) + intercept
# normalization factor
denom = expit(z)
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
#
# d/dwj log(denom)
# = 1/denom * d/dw expit(wj * x + b)
# = 1/denom * expit(wj * x + b) * expit(-(wj * x + b)) * x
#
# d/dwj -Y * log_logistic(z)
# = -Y * expit(-(wj * x + b)) * x
#
z0 = (np.reciprocal(denom) * expit(z) - Y) * expit(-z)
grad[:, :n_features] = safe_sparse_dot(z0.T, X)
grad[:, :n_features] += alpha * w
if fit_intercept:
grad[:, -1] = z0.sum(axis=0)
return grad.ravel()
评论列表
文章目录