def adam(params, grads, lr=0.001, b1=0.9, b2=0.999, e=1e-8):
updates = OrderedDict()
i = theano.shared(np.float32(0))
i_t = i + 1.
for p, g in zip(params, grads):
v = build_shared_zeros(p.get_value(True).shape)
r = build_shared_zeros(p.get_value(True).shape)
v_t = (b1 * v) + (1. - b1) * g
r_t = (b2 * r) + (1. - b2) * T.sqr(g)
r_hat = lr / (T.sqrt(r_t / (1 - b2 ** i_t)) + e)
v_hat = v / (1 - b1 ** i_t)
p_t = p - r_hat * v_hat
updates[v] = v_t
updates[r] = r_t
updates[p] = p_t
updates[i] = i_t
return updates
optimizers.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录