def ADADELTA(obj, t=0, lr=1e-0, l2reg=1e-2, rho=0.95, eps=1e-8):
obj = regularize(obj, l2reg)
if not hasattr(obj, 'V'): obj.V = np.zeros_like(obj.W)
if not hasattr(obj, 'D'): obj.D = np.zeros_like(obj.W)
nescale(obj.V, np.single( rho) )
newsadd(obj.V, np.single(1.0-rho), obj.G )
nescale(obj.G, nedivsr(obj.D, np.single(eps), obj.V)) # must be careful later with G
nescale(obj.D, np.single( rho) )
newsadd(obj.D, np.single(1.0-rho), obj.G )
newtadd(obj.W, -1, obj.G )
#obj.V = np.single(rho) * obj.V + np.single(1.0 - rho) * obj.G * obj.G
#D = np.sqrt((obj.D + eps) / (obj.V + eps)) * obj.G
#obj.D = np.single(rho) * obj.D + np.single(1.0 - rho) * D * D
#obj.W -= D
评论列表
文章目录