python类sqr()的实例源码-面圈网

theano_utils.py 文件源码项目：CopyNet 作者: MultiPath 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def dot_2d(k, M, b=None, g=None):
    # k: (nb_samples, memory_width)
    # M: (nb_samples, memory_dim, memory_width)

    # norms of keys and memories
    # k_norm = T.sqrt(T.sum(T.sqr(k), 1)) + 1e-5  # (nb_samples,)
    # M_norm = T.sqrt(T.sum(T.sqr(M), 2)) + 1e-5  # (nb_samples, memory_dim,)

    k      = k[:, None, :]                      # (nb_samples, 1, memory_width)
    value  = k * M
    if b is not None:
        b  = b[:, None, :]
        value *= b         # (nb_samples, memory_dim,)

    if g is not None:
        g  = g[None, None, :]
        value *= g

    sim    = T.sum(value, axis=2)
    return sim

ops.py 文件源码项目：dnc-theano 作者: khaotik 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def op_cosine_c(
    s_xr_, s_xi_, s_yr_, s_yi_, axis_=-1, keepdims_=True, eps_=1e-7):
    '''
    cosine between two complex vectors, uses standard complex inner product

    Args:
        s_xr_: real part of x
        s_xi_: imag part of x
        s_yr_: real part of y
        s_yi_: imag part of y
        eps_: small number to prevent divide by zero
    '''
    s_nrm = s_xr_*s_yr_ + s_xi_*s_yi_
    s_nx = T.sum(T.sqr(s_xr_) + T.sqr(s_xi_), axis=axis_, keepdims=keepdims_)
    s_ny = T.sum(T.sqr(s_yr_) + T.sqr(s_yi_), axis=axis_, keepdims=keepdims_)
    return T.sum(s_nrm, axis=axis_, keepdims=keepdims_) / T.sqrt(s_nx * s_ny + eps_)

ops.py 文件源码项目：dnc-theano 作者: khaotik 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None):
    '''
    orthogoal matrix loss
    used to regularize parameter to unitary

    Args:
        s_x_: (batch of) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
        ndim_: specify args to be (ndim_ x ndim_) matrices

    '''
    if ndim_ is None:
        ax = axes_[0]
        ndim = T.shape(s_x_)[ax]
    else:
        ndim = ndim_

    tpat = list(range(ndim))
    bpat = ['x'] * s_x_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y = T.dot(s_x_.transpose(*tpat), s_x_)
    return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))

optimization.py 文件源码项目：icml17_knn 作者: taolei87 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def create_esgd_updates(updates, params, gparams, gsums, xsums, lr, eps, gamma, momentum):
    has_momentum = momentum.get_value() > 0.0
    samples = [ default_mrng.normal(size=p.shape, avg=0, std=1,
                    dtype=theano.config.floatX) for p in params ]
    HVs = T.Lop(gparams, params, samples)

    i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
    i_t = i + 1.0
    omg_t = 1.0 - gamma**i_t
    for p, g, m, D, Hv in zip(params, gparams, gsums, xsums, HVs):
        if is_subtensor_op(p):
            raise Exception("ESGD subtensor update not implemented!")
        else:
            D_t = D * gamma + T.sqr(Hv) * (1.0-gamma)
            if has_momentum:
                m_t = m*momentum + g
                updates[m] = m_t
            else:
                m_t = g
            g_t = m_t / ( T.sqrt(D_t/omg_t + eps) )
            #g_t = m_t / ( T.sqrt(D_t + eps) )
            updates[D] = D_t
            updates[p] = p - lr*g_t
    updates[i] = i_t

theano_utils.py 文件源码项目：seq2seq-keyphrase 作者: memray 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def dot_2d(k, M, b=None, g=None):
    # k: (nb_samples, memory_width)
    # M: (nb_samples, memory_dim, memory_width)

    # norms of keys and memories
    # k_norm = T.sqrt(T.sum(T.sqr(k), 1)) + 1e-5  # (nb_samples,)
    # M_norm = T.sqrt(T.sum(T.sqr(M), 2)) + 1e-5  # (nb_samples, memory_dim,)

    k      = k[:, None, :]                      # (nb_samples, 1, memory_width)
    value  = k * M
    if b is not None:
        b  = b[:, None, :]
        value *= b         # (nb_samples, memory_dim,)

    if g is not None:
        g  = g[None, None, :]
        value *= g

    sim    = T.sum(value, axis=2)
    return sim

nnoptimizer.py 文件源码项目：NCRF-AE 作者: cosmozhang 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def Adam(cost, params, learning_rate=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = OrderedDict()
    grads = T.grad(cost, params)
    i = theano.shared(np.asarray(0., dtype=theano.config.floatX))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = learning_rate * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)

        updates[m] = m_t
        updates[v] = v_t
        updates[p] = p_t
    updates[i] = i_t

    return updates

optimizers.py 文件源码项目：theano-recurrence 作者: uyaseen 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def adam(cost, params, lr=0.001, b1=0.9, b2=0.999, e=1e-8):
    updates = []
    grads = T.grad(cost, params)
    i = theano.shared(np.dtype(theano.config.floatX).type(1))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        g = T.clip(g, -grad_clip, grad_clip)
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates

updates.py 文件源码项目：yadll 作者: pchavanne 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def adam(cost, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-6, **kwargs):
    """Adam Gradient Descent
    Scale learning rates by Adaptive moment estimation

    References
    ----------
    .. [1] https://arxiv.org/pdf/1412.6980v8.pdf
    """
    gparams = T.grad(cost, params)
    updates = OrderedDict()
    t = shared_variable(to_float_X(0.))
    t_t = 1. + t
    l_r_t = learning_rate * T.sqrt(1. - beta2 ** t_t) / (1. - beta1 ** t_t)
    for param, gparam in zip(params, gparams):
        m = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        v = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        m_t = beta1 * m + (1. - beta1) * gparam
        v_t = beta2 * v + (1. - beta2) * T.sqr(gparam)
        updates[m] = m_t
        updates[v] = v_t
        updates[param] = param - l_r_t * m_t / (T.sqrt(v_t) + epsilon)
    updates[t] = t_t
    return updates

layers2.py 文件源码项目：Theano-MPI 作者: uoguelph-mlrg 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def __call__(self, c01b):
        """
        .. todo::
            WRITEME
        """
        half = self.n // 2

        sq = T.sqr(c01b)

        ch, r, c, b = c01b.shape

        extra_channels = T.alloc(0., ch + 2*half, r, c, b)

        sq = T.set_subtensor(extra_channels[half:half+ch,:,:,:], sq)

        scale = self.k

        for i in xrange(self.n):
            scale += self.alpha * sq[i:i+ch,:,:,:]

        scale = scale ** self.beta

        return c01b / scale

layers.py 文件源码项目：Theano-MPI 作者: uoguelph-mlrg 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __call__(self, c01b):
        """
        .. todo::
            WRITEME
        """
        half = self.n // 2

        sq = T.sqr(c01b)

        ch, r, c, b = c01b.shape

        extra_channels = T.alloc(0., ch + 2*half, r, c, b)

        sq = T.set_subtensor(extra_channels[half:half+ch,:,:,:], sq)

        scale = self.k

        for i in xrange(self.n):
            scale += self.alpha * sq[i:i+ch,:,:,:]

        scale = scale ** self.beta

        return c01b / scale

rcnn_class.py 文件源码项目：Recurrent-Convolutional-Neural-Network 作者: monisjaved 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_cost(self, X, Y, X_sizes):
        """
        Calculates cost for each values in mini batch, also
        regularizes all the input parameters and then returns
        final cost function as theano variable

        """
        cost_fn, _ = theano.scan(
            fn=self.get_likelihood,
            sequences=[X, Y, X_sizes]
        )

        cost_fn = cost_fn.mean()
        cost_fn += self.reg_lambda * T.sqr(self.W_c_r).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_c_l).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_conv).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_output).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.b_output).sum() / 2.

        # Regularizing word embedding
        cost_fn += self.reg_lambda * T.sqr(self.vector_dict).sum() / 2

        return cost_fn

models.py 文件源码项目：complex 作者: ttrouill 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def define_loss(self):

        #Inverse since those that have a smaller distance are the most probable.
        self.pred_func =  TT.nnet.sigmoid( TT.sum(self.e1[self.rows,:] * self.r1[self.cols,:] * self.e1[self.tubes,:], 1) \
                       + TT.sum(self.e2[self.rows,:] * self.r1[self.cols,:] * self.e2[self.tubes,:], 1) \
                       + TT.sum(self.e1[self.rows,:] * self.r2[self.cols,:] * self.e2[self.tubes,:], 1) \
                       - TT.sum(self.e2[self.rows,:] * self.r2[self.cols,:] * self.e1[self.tubes,:], 1) )

        self.loss = TT.nnet.softplus( - self.ys * ( TT.sum(self.e1[self.rows,:] * self.r1[self.cols,:] * self.e1[self.tubes,:], 1) \
                       + TT.sum(self.e2[self.rows,:] * self.r1[self.cols,:] * self.e2[self.tubes,:], 1) \
                       + TT.sum(self.e1[self.rows,:] * self.r2[self.cols,:] * self.e2[self.tubes,:], 1) \
                       - TT.sum(self.e2[self.rows,:] * self.r2[self.cols,:] * self.e1[self.tubes,:], 1) )).mean()

        self.regul_func = TT.sqr(self.e1[self.rows,:]).mean() \
                        + TT.sqr(self.e2[self.rows,:]).mean() \
                        + TT.sqr(self.e1[self.tubes,:]).mean() \
                        + TT.sqr(self.e2[self.tubes,:]).mean() \
                        + TT.sqr(self.r1[self.cols,:]).mean() \
                        + TT.sqr(self.r2[self.cols,:]).mean()

update_function.py 文件源码项目：recnet 作者: joergfranke 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def fit(self, weights, o_error, tpo ):

        gradients = T.grad(o_error ,weights)
        updates = []
        for c, v, w, g in zip(self.t_cache, self.t_velocity, weights,gradients):
            new_velocity = T.sub( T.mul(tpo["momentum_rate"], v) , T.mul(tpo["learn_rate"], g) )
            new_cache = T.add( T.mul(tpo["decay_rate"] , c) , T.mul(T.sub( 1, tpo["decay_rate"]) , T.sqr(g)))
            new_weights = T.sub(T.add(w , new_velocity) , T.true_div( T.mul(g,tpo["learn_rate"]) , T.sqrt(T.add(new_cache,0.1**8))))
            updates.append((w, new_weights))
            updates.append((v, new_velocity))
            updates.append((c, new_cache))

        return updates


######                 Nesterov momentum
########################################

update_function.py 文件源码项目：recnet 作者: joergfranke 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def fit(self, weights, o_error, tpo):
        updates = []
        gradients = theano.grad(o_error, weights)


        for c, w, g in zip(self.t_cache, weights, gradients):
            new_cache = tpo["decay_rate"] * c + ( 1- tpo["decay_rate"]) * T.sqr(g)
            new_weights = w - (g * tpo["learn_rate"]) / T.sqrt(new_cache + 0.1**8)
            updates.append((w, new_weights))
            updates.append((c, new_cache))

        return updates


######                          ADADELTA
########################################

utils.py 文件源码项目：experiments 作者: tencia 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
        deterministic, binary, L):
    layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
            + l_x_list + [l_x], deterministic=deterministic)
    z_mu =  layer_outputs[0]
    z_ls =  layer_outputs[1]
    x_mu =  [] if binary else layer_outputs[2:2+L]
    x_ls =  [] if binary else layer_outputs[2+L:2+2*L]
    x_list =  layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
    x = layer_outputs[-1]
    kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
    if binary:
        logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
                for x in x_list) * (-1./L)
        prediction = x_list[0] if deterministic else x
    else:
        logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
            for mu, ls in zip(x_mu, x_ls))/L
        prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
    loss = -1 * (logpxz + kl_div)
    return loss, prediction

MoGNADE.py 文件源码项目：NADE 作者: MarcCote 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)

optimizers.py 文件源码项目：neural-semantic-role-labeler 作者: hiroki13 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def adam(params, grads, lr=0.001, b1=0.9, b2=0.999, e=1e-8):
    updates = OrderedDict()
    i = theano.shared(np.float32(0))
    i_t = i + 1.

    for p, g in zip(params, grads):
        v = build_shared_zeros(p.get_value(True).shape)
        r = build_shared_zeros(p.get_value(True).shape)

        v_t = (b1 * v) + (1. - b1) * g
        r_t = (b2 * r) + (1. - b2) * T.sqr(g)

        r_hat = lr / (T.sqrt(r_t / (1 - b2 ** i_t)) + e)
        v_hat = v / (1 - b1 ** i_t)

        p_t = p - r_hat * v_hat
        updates[v] = v_t
        updates[r] = r_t
        updates[p] = p_t

    updates[i] = i_t
    return updates

optimizers.py 文件源码项目：lemontree 作者: khshim 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def gradients_to_updates(self, params, grads):
        updates = OrderedDict()
        for pp, gg in zip(params, grads):
            value = pp.get_value(borrow=True)
            self.accu = theano.shared(np.zeros(value.shape, dtype=theano.config.floatX), 'adadelta_accu_'+pp.name)
            self.delta_accu = theano.shared(np.zeros(value.shape, dtype=theano.config.floatX), 'adadelta_delta_accu_'+pp.name)
            self.params.append(self.accu)
            self.params.append(self.delta_accu)
            self.accu.tags = ['optimizer_param']
            self.delta_accu.tags = ['optimizer_param']
            accu_new = self.rho * self.accu + (1 - self.rho) * T.sqr(gg)
            updates[self.accu] = accu_new
            ud = gg * (T.sqrt(self.delta_accu) + 1e-7) / (T.sqrt(accu_new) + 1e-7)
            updates[pp] = pp - self.lr * ud
            delta_accu_new = self.rho * self.delta_accu + (1 - self.rho) * T.sqr(ud)
            updates[self.delta_accu] = delta_accu_new
        return updates

optimization.py 文件源码项目：SocializedWordEmbeddings 作者: HKUST-KnowComp 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def create_esgd_updates(updates, params, gparams, gsums, xsums, lr, eps, gamma, momentum):
    has_momentum = momentum.get_value() > 0.0
    samples = [ default_mrng.normal(size=p.shape, avg=0, std=1,
                    dtype=theano.config.floatX) for p in params ]
    HVs = T.Lop(gparams, params, samples)

    i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
    i_t = i + 1.0
    omg_t = 1.0 - gamma**i_t
    for p, g, m, D, Hv in zip(params, gparams, gsums, xsums, HVs):
        if is_subtensor_op(p):
            raise Exception("ESGD subtensor update not implemented!")
        else:
            D_t = D * gamma + T.sqr(Hv) * (1.0-gamma)
            if has_momentum:
                m_t = m*momentum + g
                updates[m] = m_t
            else:
                m_t = g
            g_t = m_t / ( T.sqrt(D_t/omg_t + eps) )
            #g_t = m_t / ( T.sqrt(D_t + eps) )
            updates[D] = D_t
            updates[p] = p - lr*g_t
    updates[i] = i_t

utils.py 文件源码项目：video_predict 作者: tencia 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
        deterministic, binary, L):
    layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
            + l_x_list + [l_x], deterministic=deterministic)
    z_mu =  layer_outputs[0]
    z_ls =  layer_outputs[1]
    x_mu =  [] if binary else layer_outputs[2:2+L]
    x_ls =  [] if binary else layer_outputs[2+L:2+2*L]
    x_list =  layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
    x = layer_outputs[-1]
    kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
    if binary:
        logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
                for x in x_list) * (-1./L)
        prediction = x_list[0] if deterministic else x
    else:
        logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
            for mu, ls in zip(x_mu, x_ls))/L
        prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
    loss = -1 * (logpxz + kl_div)
    return loss, prediction

optimizer.py 文件源码项目：deep-prior-pp 作者: moberweger 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def RMSProp(self, learning_rate=0.01, decay=0.9, epsilon=1.0 / 100.):
        """
        RMSProp of Tieleman et al.
        :param learning_rate: learning rate
        :param decay: decay rate of gradient history
        :param epsilon: gradient clip
        :return: update
        """

        for param_i, grad_i in zip(self.params, self.grads):
            # Accumulate gradient
            msg = theano.shared(numpy.zeros(param_i.get_value().shape, dtype=theano.config.floatX))
            self.shared.append(msg)
            new_mean_squared_grad = (decay * msg + (1 - decay) * T.sqr(grad_i))

            # Compute update
            rms_grad_t = T.sqrt(new_mean_squared_grad)
            rms_grad_t = T.maximum(rms_grad_t, epsilon)
            delta_x_t = -learning_rate * grad_i / rms_grad_t

            # Apply update
            self.updates.append((param_i, param_i + delta_x_t))
            self.updates.append((msg, new_mean_squared_grad))

        return self.updates

utils.py 文件源码项目：diagnose-heart 作者: woshialex 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
        deterministic, binary, L):
    layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
            + l_x_list + [l_x], deterministic=deterministic)
    z_mu =  layer_outputs[0]
    z_ls =  layer_outputs[1]
    x_mu =  [] if binary else layer_outputs[2:2+L]
    x_ls =  [] if binary else layer_outputs[2+L:2+2*L]
    x_list =  layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
    x = layer_outputs[-1]
    kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
    if binary:
        logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
                for x in x_list) * (-1./L)
        prediction = x_list[0] if deterministic else x
    else:
        logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
            for mu, ls in zip(x_mu, x_ls))/L
        prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
    loss = -1 * (logpxz + kl_div)
    return loss, prediction

utils.py 文件源码项目：diagnose-heart 作者: woshialex 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
        deterministic, binary, L):
    layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
            + l_x_list + [l_x], deterministic=deterministic)
    z_mu =  layer_outputs[0]
    z_ls =  layer_outputs[1]
    x_mu =  [] if binary else layer_outputs[2:2+L]
    x_ls =  [] if binary else layer_outputs[2+L:2+2*L]
    x_list =  layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
    x = layer_outputs[-1]
    kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
    if binary:
        logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
                for x in x_list) * (-1./L)
        prediction = x_list[0] if deterministic else x
    else:
        logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
            for mu, ls in zip(x_mu, x_ls))/L
        prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
    loss = -1 * (logpxz + kl_div)
    return loss, prediction

tree_rnn.py 文件源码项目：tree_rnn 作者: ofirnachum 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def gradient_descent(self, loss):
        """Momentum GD with gradient clipping."""
        grad = T.grad(loss, self.params)
        self.momentum_velocity_ = [0.] * len(grad)
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
        updates = OrderedDict()
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        scaling_den = T.maximum(5.0, grad_norm)
        for n, (param, grad) in enumerate(zip(self.params, grad)):
            grad = T.switch(not_finite, 0.1 * param,
                            grad * (5.0 / scaling_den))
            velocity = self.momentum_velocity_[n]
            update_step = self.momentum * velocity - self.learning_rate * grad
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step
        return updates

optimizer.py 文件源码项目：DBQA 作者: nanfeng1101 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def Adam(self, params, cost, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        i = theano.shared(as_floatX(0.))
        i_t = i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((i, i_t))
        return updates

adam.py 文件源码项目：hred-latent-piecewise 作者: julianser 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = []
    varlist = []
    i = sharedX(0.)
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in grads.items():
        m = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_m')
        v = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_v')
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)

        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))

        varlist.append(m)
        varlist.append(v)

    updates.append((i, i_t))
    return updates, varlist

utils.py 文件源码项目：hred-latent-piecewise 作者: julianser 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def Adagrad(grads, lr):
    updates = OrderedDict()
    for param in grads.keys():
        # sum_square_grad := \sum g^2
        sum_square_grad = sharedX(param.get_value() * 0.)
        if param.name is not None:
            sum_square_grad.name = 'sum_square_grad_' + param.name

        # Accumulate gradient
        new_sum_squared_grad = sum_square_grad + T.sqr(grads[param])

        # Compute update
        delta_x_t = (- lr / T.sqrt(numpy.float32(1e-5) + new_sum_squared_grad)) * grads[param]

        # Apply update
        updates[sum_square_grad] = new_sum_squared_grad
        updates[param] = param + delta_x_t
    return updates

theano_utils.py 文件源码项目：CopyNet 作者: MingyuanXie 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def dot_2d(k, M, b=None, g=None):
    # k: (nb_samples, memory_width)
    # M: (nb_samples, memory_dim, memory_width)

    # norms of keys and memories
    # k_norm = T.sqrt(T.sum(T.sqr(k), 1)) + 1e-5  # (nb_samples,)
    # M_norm = T.sqrt(T.sum(T.sqr(M), 2)) + 1e-5  # (nb_samples, memory_dim,)

    k      = k[:, None, :]                      # (nb_samples, 1, memory_width)
    value  = k * M
    if b is not None:
        b  = b[:, None, :]
        value *= b         # (nb_samples, memory_dim,)

    if g is not None:
        g  = g[None, None, :]
        value *= g

    sim    = T.sum(value, axis=2)
    return sim

utils.py 文件源码项目：neural-style 作者: jayanthkoushik 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_adam_updates(f, params, lr=10., b1=0.9, b2=0.999, e=1e-8, dec=5e-3, norm_grads=False):
    """Generate updates to optimize using the Adam optimizer with linear learning rate decay."""
    t = theano.shared(0)
    ms = [theano.shared(np.zeros(param.shape.eval(), dtype=floatX), borrow=True) for param in params]
    vs = [theano.shared(np.zeros(param.shape.eval(), dtype=floatX), borrow=True) for param in params]

    gs = T.grad(f, params)
    if norm_grads:
        gs = [g / (T.sum(T.abs_(g)) + 1e-8) for g in gs]
    t_u = (t, t + 1)
    m_us = [(m, b1 * m + (1. - b1) * g) for m, g in zip(ms, gs)]
    v_us = [(v, b2 * v + (1. - b2) * T.sqr(g)) for v, g in zip(vs, gs)]
    t_u_f = T.cast(t_u[1], floatX)
    lr_hat =  (lr / (1. + t_u_f * dec)) * T.sqrt(1. - T.pow(b2, t_u_f)) / (1. - T.pow(b1, t_u_f))
    param_us = [(param,  param - lr_hat * m_u[1] / (T.sqrt(v_u[1]) + e)) for m_u, v_u, param in zip(m_us, v_us, params)]
    return m_us + v_us + param_us + [t_u]

adam.py 文件源码项目：ADEM 作者: mike-n-7 项目源码文件源码阅读 163 收藏 0 点赞 0 评论 0

def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = []
    varlist = []
    i = sharedX(0.)
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in grads.items():
        m = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_m')
        v = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_v')
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)

        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))

        varlist.append(m)
        varlist.append(v)

    updates.append((i, i_t))
    return updates, varlist