def __call__(self, params, cost):
updates = []
grads = T.grad(cost, params)
grads = clip_norms(grads, self.clipnorm)
t = theano.shared(floatX(1.))
b1_t = self.b1*self.l**(t-1)
for p, g in zip(params, grads):
g = self.regularizer.gradient_regularize(p, g)
m = theano.shared(p.get_value() * 0.)
v = theano.shared(p.get_value() * 0.)
m_t = b1_t*m + (1 - b1_t)*g
v_t = self.b2*v + (1 - self.b2)*g**2
m_c = m_t / (1-self.b1**t)
v_c = v_t / (1-self.b2**t)
p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
p_t = self.regularizer.weight_regularize(p_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((t, t + 1.))
return updates
评论列表
文章目录