def nesterov(w, dw, config=None):
'''
Performs stochastic gradient descent with nesterov momentum.
config format:
- learning_rate: Scalar learning rate.
- momentum: Scalar between 0 and 1 giving the momentum value.
Setting momentum = 0 reduces to sgd.
- velocity: A numpy array of the same shape as w and dw used to store a moving
average of the gradients.
'''
if config is None:
config = {}
config.setdefault('learning_rate', 1e-2)
config.setdefault('momentum', 0.9)
v = config.get('velocity', np.zeros_like(w, dtype=np.float64))
next_w = None
prev_v = v
v = config['momentum'] * v - config['learning_rate'] * dw
next_w = w - config['momentum'] * prev_v + (1 + config['momentum']) * v
config['velocity'] = v
return next_w, config
评论列表
文章目录