def test_logistic_grad_hess():
rng = np.random.RandomState(0)
n_samples, n_features = 50, 5
X_ref = rng.randn(n_samples, n_features)
y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
X_ref -= X_ref.mean()
X_ref /= X_ref.std()
X_sp = X_ref.copy()
X_sp[X_sp < .1] = 0
X_sp = sp.csr_matrix(X_sp)
for X in (X_ref, X_sp):
w = .1 * np.ones(n_features)
# First check that _logistic_grad_hess is consistent
# with _logistic_loss_and_grad
loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.)
grad_2, hess = _logistic_grad_hess(w, X, y, alpha=1.)
assert_array_almost_equal(grad, grad_2)
# Now check our hessian along the second direction of the grad
vector = np.zeros_like(grad)
vector[1] = 1
hess_col = hess(vector)
# Computation of the Hessian is particularly fragile to numerical
# errors when doing simple finite differences. Here we compute the
# grad along a path in the direction of the vector and then use a
# least-square regression to estimate the slope
e = 1e-3
d_x = np.linspace(-e, e, 30)
d_grad = np.array([
_logistic_loss_and_grad(w + t * vector, X, y, alpha=1.)[1]
for t in d_x
])
d_grad -= d_grad.mean(axis=0)
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
assert_array_almost_equal(approx_hess_col, hess_col, decimal=3)
# Second check that our intercept implementation is good
w = np.zeros(n_features + 1)
loss_interp, grad_interp = _logistic_loss_and_grad(w, X, y, alpha=1.)
loss_interp_2 = _logistic_loss(w, X, y, alpha=1.)
grad_interp_2, hess = _logistic_grad_hess(w, X, y, alpha=1.)
assert_array_almost_equal(loss_interp, loss_interp_2)
assert_array_almost_equal(grad_interp, grad_interp_2)
评论列表
文章目录