def neural_network_cost_gradient(parameters, input, output):
"""
3-layer network cost and gradient function
:param parameters: pair of (W1, W2)
:param input: input vector
:param output: index to correct label
:return: cross entropy cost and gradient
"""
W1, W2 = parameters
input = input.reshape(-1, 1)
hidden_layer = expit(W1.dot(input))
inside_softmax = W2.dot(hidden_layer)
# TODO: allow softmax to normalize column vector
prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
cost = -np.sum(np.log(prediction[output]))
one_hot = np.zeros_like(prediction)
one_hot[output] = 1
delta = prediction - one_hot
gradient_W2 = delta.dot(hidden_layer.T)
gradient_W1 = sigmoid_gradient(hidden_layer) * W2.T.dot(delta).dot(input.T)
gradient = [gradient_W1, gradient_W2]
return cost, gradient
评论列表
文章目录