def create_esgd_updates(updates, params, gparams, gsums, xsums, lr, eps, gamma, momentum):
has_momentum = momentum.get_value() > 0.0
samples = [ default_mrng.normal(size=p.shape, avg=0, std=1,
dtype=theano.config.floatX) for p in params ]
HVs = T.Lop(gparams, params, samples)
i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
i_t = i + 1.0
omg_t = 1.0 - gamma**i_t
for p, g, m, D, Hv in zip(params, gparams, gsums, xsums, HVs):
if is_subtensor_op(p):
raise Exception("ESGD subtensor update not implemented!")
else:
D_t = D * gamma + T.sqr(Hv) * (1.0-gamma)
if has_momentum:
m_t = m*momentum + g
updates[m] = m_t
else:
m_t = g
g_t = m_t / ( T.sqrt(D_t/omg_t + eps) )
#g_t = m_t / ( T.sqrt(D_t + eps) )
updates[D] = D_t
updates[p] = p - lr*g_t
updates[i] = i_t
optimization.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录