def Adam(cost, params, learning_rate=0.0002, b1=0.1, b2=0.001, e=1e-8):
updates = OrderedDict()
grads = T.grad(cost, params)
i = theano.shared(np.asarray(0., dtype=theano.config.floatX))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = learning_rate * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
m = theano.shared(p.get_value() * 0.)
v = theano.shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates[m] = m_t
updates[v] = v_t
updates[p] = p_t
updates[i] = i_t
return updates
评论列表
文章目录