def gradients_to_updates(self, params, grads):
updates = OrderedDict()
for pp, gg in zip(params, grads):
value = pp.get_value(borrow=True)
self.accu = theano.shared(np.zeros(value.shape, dtype=theano.config.floatX), 'adadelta_accu_'+pp.name)
self.delta_accu = theano.shared(np.zeros(value.shape, dtype=theano.config.floatX), 'adadelta_delta_accu_'+pp.name)
self.params.append(self.accu)
self.params.append(self.delta_accu)
self.accu.tags = ['optimizer_param']
self.delta_accu.tags = ['optimizer_param']
accu_new = self.rho * self.accu + (1 - self.rho) * T.sqr(gg)
updates[self.accu] = accu_new
ud = gg * (T.sqrt(self.delta_accu) + 1e-7) / (T.sqrt(accu_new) + 1e-7)
updates[pp] = pp - self.lr * ud
delta_accu_new = self.rho * self.delta_accu + (1 - self.rho) * T.sqr(ud)
updates[self.delta_accu] = delta_accu_new
return updates
评论列表
文章目录