def Adagrad(grads, lr):
updates = OrderedDict()
for param in grads.keys():
# sum_square_grad := \sum g^2
sum_square_grad = sharedX(param.get_value() * 0.)
if param.name is not None:
sum_square_grad.name = 'sum_square_grad_' + param.name
# Accumulate gradient
new_sum_squared_grad = sum_square_grad + T.sqr(grads[param])
# Compute update
delta_x_t = (- lr / T.sqrt(numpy.float32(1e-5) + new_sum_squared_grad)) * grads[param]
# Apply update
updates[sum_square_grad] = new_sum_squared_grad
updates[param] = param + delta_x_t
return updates
评论列表
文章目录