optim.py 文件源码-python代码片段

optim.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

def nesterov(w, dw, config=None):
    '''
    Performs stochastic gradient descent with nesterov momentum.

    config format:
    - learning_rate: Scalar learning rate.
    - momentum: Scalar between 0 and 1 giving the momentum value.
      Setting momentum = 0 reduces to sgd.
    - velocity: A numpy array of the same shape as w and dw used to store a moving
      average of the gradients.
    '''
    if config is None:
        config = {}
    config.setdefault('learning_rate', 1e-2)
    config.setdefault('momentum', 0.9)
    v = config.get('velocity', np.zeros_like(w, dtype=np.float64))

    next_w = None
    prev_v = v
    v = config['momentum'] * v - config['learning_rate'] * dw
    next_w = w - config['momentum'] * prev_v + (1 + config['momentum']) * v
    config['velocity'] = v

    return next_w, config