def deepmind_rmsprop(loss_or_grads, params, learning_rate=0.00025,
rho=0.95, epsilon=0.01):
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
acc_grad = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_grad_new = rho * acc_grad + (1 - rho) * grad
acc_rms = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_rms_new = rho * acc_rms + (1 - rho) * grad ** 2
updates[acc_grad] = acc_grad_new
updates[acc_rms] = acc_rms_new
updates[param] = (param - learning_rate *
(grad /
T.sqrt(acc_rms_new - acc_grad_new ** 2 + epsilon)))
return updates
评论列表
文章目录