def ADAM(obj, t=0, lr=1e-3, l2reg=1e-2, beta1=0.9, beta2=0.999, eps=1e-8):
obj = regularize(obj, l2reg)
lrW = lr #schedule(t, lr, l2reg)
if not hasattr(obj, 'M'): obj.M = np.zeros_like(obj.W)
if not hasattr(obj, 'V'): obj.V = np.zeros_like(obj.W)
nescale(obj.M, np.single( beta1) )
newtadd(obj.M, np.single(1.0-beta1), obj.G )
nescale(obj.V, np.single( beta2) )
newsadd(obj.V, np.single(1.0-beta2), obj.G )
newtadd(obj.W, np.single( -lrW ), nesrdiv(obj.M, np.single(eps), obj.V))
#obj.M = np.single(beta1) * obj.M + np.single(1.0 - beta1) * obj.G
#obj.V = np.single(beta2) * obj.V + np.single(1.0 - beta2) * obj.G * obj.G
#obj.W -= np.single(lrW) * obj.M / (np.sqrt(obj.V) + np.single(eps))
评论列表
文章目录