def test_gradient():
# Test gradient.
# This makes sure that the activation functions and their derivatives
# are correct. The numerical and analytical computation of the gradient
# should be close.
for n_labels in [2, 3]:
n_samples = 5
n_features = 10
X = np.random.random((n_samples, n_features))
y = 1 + np.mod(np.arange(n_samples) + 1, n_labels)
Y = LabelBinarizer().fit_transform(y)
for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
algorithm='l-bfgs', alpha=1e-5,
learning_rate_init=0.2, max_iter=1,
random_state=1)
mlp.fit(X, y)
theta = np.hstack([l.ravel() for l in mlp.coefs_ +
mlp.intercepts_])
layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] +
[mlp.n_outputs_])
activations = []
deltas = []
coef_grads = []
intercept_grads = []
activations.append(X)
for i in range(mlp.n_layers_ - 1):
activations.append(np.empty((X.shape[0],
layer_units[i + 1])))
deltas.append(np.empty((X.shape[0],
layer_units[i + 1])))
fan_in = layer_units[i]
fan_out = layer_units[i + 1]
coef_grads.append(np.empty((fan_in, fan_out)))
intercept_grads.append(np.empty(fan_out))
# analytically compute the gradients
def loss_grad_fun(t):
return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas,
coef_grads, intercept_grads)
[value, grad] = loss_grad_fun(theta)
numgrad = np.zeros(np.size(theta))
n = np.size(theta, 0)
E = np.eye(n)
epsilon = 1e-5
# numerically compute the gradients
for i in range(n):
dtheta = E[:, i] * epsilon
numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] -
loss_grad_fun(theta - dtheta)[0]) /
(epsilon * 2.0))
assert_almost_equal(numgrad, grad)
评论列表
文章目录