optim.py 文件源码-python代码片段

optim.py 文件源码
python
阅读 45 收藏 0 点赞 0 评论 0
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2):
    print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')

    new = OrderedDict()

    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(new[it],n_accum), 0)
    update = T.eq(T.mod(new[it],n_accum), n_accum-1)

    for i in range(len(w)):
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        g_sum = G.sharedf(w[i].get_value() * 0.)

        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        new[g_sum] = ifelse(reset, g[i], g_sum + g[i])
        new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
        new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
        new[w[i]] = ifelse(update, w[i] + alpha *  new[mom1] / new[_max], w[i])

    return new

# AdaMax that keeps running average of parameter