def hard_rmsprop(loss_or_grads, params, learning_rate = 1.0e-2, epsilon=1e-6):
"""
Not an actual RMSProp: just normalizes the gradient, so it norm equal to the `learning rate` parameter.
Don't use unless you have to.
:param loss_or_grads: loss to minimize
:param params: params to optimize
:param learning_rate: norm of the gradient
:param epsilon: small number for computational stability.
:return:
"""
grads = get_or_compute_grads(loss_or_grads, params)
gnorm = T.sqrt(sum(T.sum(g**2) for g in grads) + epsilon)
grads = [ g / gnorm for g in grads ]
updates = OrderedDict()
for param, grad in zip(params, grads):
updates[param] = param - learning_rate * grad
return updates
评论列表
文章目录