python类gradient()的实例源码

basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def grad(self, inputs, g):

        # g[1:] is all integers, so their Jacobian in this op
        # is 0. We thus don't need to worry about what their values
        # are.

        # if g[0] is disconnected, then this op doesn't contribute
        # any gradient anywhere. but we know that at least one of
        # g[1:] is connected, or this grad method wouldn't have been
        # called, so we should report zeros
        (csm,) = inputs
        if isinstance(g[0].type, DisconnectedType):
            return [csm.zeros_like()]

        data, indices, indptr, shape = csm_properties(csm)
        return [CSM(csm.format)(g[0], indices, indptr, shape)]

# don't make this a function or it breaks some optimizations below
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def perform(self, node, inputs, outputs):
        (a_indices, a_indptr, b, g_ab) = inputs
        (out,) = outputs
        g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
        for i in xrange(len(a_indptr) - 1):  # loop over rows
            ind0 = a_indptr[i]
            ind1 = a_indptr[i + 1]
            # loop over values in that row (columns)
            for j_idx in xrange(ind0, ind1):
                j = a_indices[j_idx]
                # grad is dot product of i-th row of gradient with j-th row of b
                # Depending on the type of g_ab and b (sparse or dense),
                # the following dot product can result in a scalar or
                # a (1, 1) sparse matrix.
                dot_val = numpy.dot(g_ab[i], b[j].T)
                if isinstance(dot_val, scipy.sparse.spmatrix):
                    dot_val = dot_val[0, 0]
                g_a_data[j_idx] = dot_val
        out[0] = g_a_data
nlinalg.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def grad(self, inputs, g_outputs):
        r"""The gradient function should return

            .. math:: V\frac{\partial X^{-1}}{\partial X},

        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
        ``inputs``. Using the `matrix cookbook
        <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
        one can deduce that the relation corresponds to

            .. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.

        """
        x, = inputs
        xi = self(x)
        gz, = g_outputs
        # TT.dot(gz.T,xi)
        return [-matrix_dot(xi, gz.T, xi).T]
extra_ops.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.

        """
        a, val = inp
        grad = cost_grad[0]
        if (a.dtype.startswith('complex')):
            return [None, None]
        elif a.ndim > 2:
            raise NotImplementedError('%s: gradient is currently implemented'
                                      ' for matrices only' %
                                      self.__class__.__name__)
        wr_a = fill_diagonal(grad, 0)  # valid for any number of dimensions
        # diag is only valid for matrices
        wr_val = theano.tensor.nlinalg.diag(grad).sum()
        return [wr_a, wr_val]
nnet.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def binary_crossentropy(output, target):
    """
    Compute the crossentropy of binary random variables.

    Output and target are each expectations of binary random
    variables; target may be exactly 0 or 1 but output must
    lie strictly between 0 and 1.

    Notes
    -----
    We could use the x log y op to support output=0 and output=1.
    The gradient would still be undefined though.

    We do not sum, crossentropy is computed by component.
    TODO : Rewrite as a scalar, and then broadcast to tensor.

    """
    return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def dnn_gradweight(img, topgrad,
                   kerns_shp,
                   border_mode='valid', subsample=(1, 1),
                   conv_mode='conv'):
    """
    GPU convolution gradient with respect to weight using cuDNN from NVIDIA.

    The memory layout to use is 'bc01', that is 'batch', 'channel',
    'first dim', 'second dim' in that order.

    FIXME parameters doc

    :warning: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """

    img = gpu_contiguous(img)
    topgrad = gpu_contiguous(topgrad)
    kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img.shape, kerns_shp)
    out = gpu_alloc_empty(*kerns_shp)
    return GpuDnnConvGradW()(img, topgrad, out, desc)
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def dnn_gradweight3d(img, topgrad,
                     kerns_shp,
                     border_mode='valid', subsample=(1, 1, 1),
                     conv_mode='conv'):
    """
    GPU convolution gradient with respect to weight using cuDNN from NVIDIA.

    The memory layout to use is 'bct01', that is 'batch', 'channel',
    'first dim', 'second dim' in that order.

    FIXME parameters doc

    :warning: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """

    img = gpu_contiguous(img)
    topgrad = gpu_contiguous(topgrad)
    kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img.shape, kerns_shp)
    out = gpu_alloc_empty(*kerns_shp)
    return GpuDnnConv3dGradW()(img, topgrad, out, desc)
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def dnn_gradinput(kerns, topgrad,
                  img_shp,
                  border_mode='valid', subsample=(1, 1),
                  conv_mode='conv'):
    """
    GPU convolution gradient with respect to input using cuDNN from NVIDIA.

    The memory layout to use is 'bc01', that is 'batch', 'channel',
    'first dim', 'second dim' in that order.

    FIXME parameters doc

    :warning: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """

    kerns = gpu_contiguous(kerns)
    topgrad = gpu_contiguous(topgrad)
    img_shp = theano.tensor.as_tensor_variable(img_shp)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img_shp, kerns.shape)

    out = gpu_alloc_empty(*img_shp)
    return GpuDnnConvGradI()(kerns, topgrad, out, desc)
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def dnn_gradinput3d(kerns, topgrad,
                    img_shp,
                    border_mode='valid', subsample=(1, 1),
                    conv_mode='conv'):
    """
    GPU convolution gradient with respect to input using cuDNN from NVIDIA.

    The memory layout to use is 'bct01', that is 'batch', 'channel',
    'first dim', 'second dim' in that order.

    FIXME parameters doc

    :warning: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not
      work with this Op.
    """

    kerns = gpu_contiguous(kerns)
    topgrad = gpu_contiguous(topgrad)
    img_shp = theano.tensor.as_tensor_variable(img_shp)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img_shp, kerns.shape)

    out = gpu_alloc_empty(*img_shp)
    return GpuDnnConv3dGradI()(kerns, topgrad, out, desc)
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def grad(self, inputs, gout):
        (cond, ift, iff) = inputs
        (gz,) = gout
        first_part = switch(cond, gz, 0.)
        second_part = switch(cond, 0., gz)

        out = self(cond, ift, iff)
        if out.type.dtype in discrete_types:
            first_part = 0.
            second_part = 0.

        # cond does affect the elements of the output so it is connected.
        # For the sake of making the gradient convenient we assume that
        # condition + epsilon always triggers the same branch as condition
        condition_grad = cond.zeros_like().astype(theano.config.floatX)

        return (condition_grad, first_part, second_part)
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def grad(self, inputs, gout):
        (x, y) = inputs
        (gz,) = gout
        if gz.type in complex_types:
            # max is currently defined for complex_types,
            # but the gradient for complex is not.
            raise NotImplementedError()

        output = self(x, y)

        if output.type in discrete_types:
            return [x.zeros_like().astype(theano.config.floatX),
                    y.zeros_like().astype(theano.config.floatX)]

        gx = eq(output, x) * gz
        gy = eq(output, y) * gz
        return (gx, gy)
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def grad(self, inputs, gout):

        (x, y) = inputs
        (gz,) = gout
        if x.type in complex_types:
            raise NotImplementedError()

        # If the output of this op is discrete, then it
        # it is locally flat everywhere, so the gradient
        # through it is 0.
        # This is different from it not being connected
        # to the output; x/y is still a function of x
        # and y; it's just a step function.
        if all(a.dtype in discrete_types for a in (x, y)):
            return [x.zeros_like(), y.zeros_like()]

        first_part = gz / y

        if y.type in complex_types:
            raise NotImplementedError()

        second_part = -(gz * x) / (y * y)

        return first_part, second_part
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def grad(self, inputs, gout):
        (y, x) = inputs
        (gz,) = gout
        if gz.type in complex_types:
            raise NotImplementedError()
        else:
            if self(x, y).type in discrete_types:
                if x.type in discrete_types:
                    gx = x.zeros_like(dtype=theano.config.floatX)
                else:
                    gx = x.zeros_like()
                if y.type in discrete_types:
                    gy = y.zeros_like(dtype=theano.config.floatX)
                else:
                    gy = y.zeros_like()
                return [gx, gy]

            # If the output is float, the gradient should flow,
            # even if the inputs are ints
            return [gz * x / (sqr(x) + sqr(y)),
                    gz * neg(y) / (sqr(x) + sqr(y))]
gradient.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def grad_not_implemented(op, x_pos, x, comment=""):
    """
    Return an un-computable symbolic variable of type `x.type`.

    If any call to tensor.grad results in an expression containing this
    un-computable variable, an exception (NotImplementedError) will be
    raised indicating that the gradient on the
    `x_pos`'th input of `op` has not been implemented. Likewise if
    any call to theano.function involves this variable.

    Optionally adds a comment to the exception explaining why this
    gradient is not implemented.
    """

    return (NullType((
        "This variable is Null because the grad method for "
        "input %s (%s) of the %s op is not implemented. %s"
    ) % (x_pos, x, op, comment)))()
gradient.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def grad_undefined(op, x_pos, x, comment=""):
    """
    Return an un-computable symbolic variable of type `x.type`.

    If any call to tensor.grad results in an expression containing this
    un-computable variable, an exception (GradUndefinedError) will be
    raised indicating that the gradient on the
    `x_pos`'th input of `op` is mathematically undefined. Likewise if
    any call to theano.function involves this variable.

    Optionally adds a comment to the exception explaining why this
    gradient is not defined.
    """

    return (NullType(
        (
            "This variable is Null because the grad method for "
            "input %s (%s) of the %s op is mathematically undefined. %s"
        ) % (x_pos, x, op, comment)))()
gradient.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def abs_rel_errors(self, g_pt):
        """Return the abs and rel error of gradient estimate `g_pt`

        `g_pt` must be a list of ndarrays of the same length as self.gf,
        otherwise a ValueError is raised.

        Corresponding ndarrays in `g_pt` and `self.gf` must have the same
        shape or ValueError is raised.

        """
        if len(g_pt) != len(self.gf):
            raise ValueError('argument has wrong number of elements',
                             len(g_pt))
        errs = []
        for i, (a, b) in enumerate(zip(g_pt, self.gf)):
            if a.shape != b.shape:
                raise ValueError('argument element %i has wrong shape %s' % (
                    i, str((a.shape, b.shape))))
            errs.append(numeric_grad.abs_rel_err(a, b))
        return errs
gradient.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def zero_grad(x):
    """
    Consider an expression constant when computing gradients.

    The expression itself is unaffected, but when its gradient is
    computed, or the gradient of another expression that this
    expression is a subexpression of, it will be backpropagated
    through with a value of zero. In other words, the gradient of
    the expression is truncated to 0.

    :param x: A Theano expression whose gradient should be truncated.

    :return: The expression is returned unmodified, but its gradient
        is now truncated to 0.
    """
    return zero_grad_(x)
gradient.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def disconnected_grad(x):
    """
    Consider an expression constant when computing gradients,
    while effectively not backpropagating through it.

    The expression itself is unaffected, but when its gradient is
    computed, or the gradient of another expression that this
    expression is a subexpression of, it will not be backpropagated
    through. This is effectively equivalent to truncating the gradient
    expression to 0, but is executed faster than zero_grad(), which stilll
    has to go through the underlying computational graph related to the
    expression.

    :param x: A Theano expression whose gradient should not be
              backpropagated through.

    :return: The expression is returned unmodified, but its gradient
        is now effectively truncated to 0.
    """
    return disconnected_grad_(x)
gfilt.py 文件源码 项目:crfrnn_layer 作者: HapeMask 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def grad(self, inputs, ograds):
        ref, values, ref_dim, val_dim = inputs[:4]
        hash_struct = inputs[4:]
        ograd = ograds[0]

        ref_dim = get_scalar_constant_value(ref_dim)
        val_dim = get_scalar_constant_value(val_dim)

        def _conv(x):
            return GaussianFilter()(ref, x, ref_dim, val_dim, *hash_struct)

        # Since the kernels are separable and symmetric, the gradient w.r.t.
        # input is just the same filtering applied to the output grads.
        grad_i = _conv(ograd)

        def _gradr(r_i, vals, og, *args):
            return (og * (_conv(vals*r_i) - r_i*_conv(vals)) +
                    vals * (_conv(og*r_i) - r_i*_conv(og)))

        grad_r, _ = theano.scan(fn=_gradr, sequences=[ref],
                                non_sequences=[values, ograd] + hash_struct,
                                outputs_info=None)

        grad_r = grad_r.sum(axis=1, acc_dtype="float32")

        grads = [DisconnectedType()() for i in range(len(inputs))]
        grads[0] = grad_r
        grads[1] = grad_i
        return grads
gpugfilt.py 文件源码 项目:crfrnn_layer 作者: HapeMask 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def grad(self, inputs, ograds):
        ref, values, ref_dim, val_dim = inputs[:4]
        hash_struct = inputs[4:]
        ograd = ograds[0]

        ref_dim = get_scalar_constant_value(ref_dim)
        val_dim = get_scalar_constant_value(val_dim)

        def _conv(x):
            return GpuGaussianFilter()(ref, x, ref_dim, val_dim, *hash_struct)

        # Since the kernels are separable and symmetric, the gradient w.r.t.
        # input is just the same filtering applied to the output grads.
        grad_i = _conv(ograd)

        def _gradr(r_i, vals, og, *args):
            return (og * (_conv(vals*r_i) - r_i*_conv(vals)) +
                    vals * (_conv(og*r_i) - r_i*_conv(og)))

        grad_r, _ = theano.scan(fn=_gradr, sequences=[ref],
                                non_sequences=[values, ograd] + hash_struct,
                                outputs_info=None)

        grad_r = grad_r.sum(axis=1, acc_dtype="float32")

        grads = [DisconnectedType()() for i in range(len(inputs))]
        grads[0] = grad_r
        grads[1] = grad_i
        return grads
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def grad(self, inp, grads):
        img, ws, stride, pad = inp
        grad, = grads

        grad = gpu_contiguous(grad)

        out = self(img, ws, stride, pad)

        g_out = GpuDnnPoolGrad(mode=self.mode)(img, out, grad, ws, stride, pad)

        return g_out, theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()()
dnn.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def L_op(self, inputs, outputs, output_grads):
        desc, w, x, hx = inputs[:4]
        cx = inputs[4] if len(inputs) == 5 else None
        reserve, y, hy = outputs[:3]
        _, dy, dhy = output_grads[:3]
        dcy = output_grads[3] if len(output_grads) == 4 else None
        # Since the op return two outputs which contain essentially
        # the same information, the user will most likely only use one
        # of them. This leads to the situation that the other is
        # considered "disconnected" by theano in the gradient.
        # However we know that this isn't really the case so we fix it
        # here.

        # If all the ys are disconnected, then you get a boring
        # gradient instead of an error.  But in that case you
        # shouldn't call this method anyway.
        if isinstance(dy.type, DisconnectedType):
            dy = as_gpuarray_variable(y.zeros_like(),
                                      context_name=y.type.context_name)
        if isinstance(dhy.type, DisconnectedType):
            dhy = None
        if dcy and isinstance(dcy.type, DisconnectedType):
            dcy = None
        dinputs = GpuDnnRNNGradInputs(rnn_mode=self.rnn_mode,
                                      grad_h=(dhy is not None),
                                      grad_c=(dcy is not None))(
            desc, x, y, dy, dhy, dcy, w, hx, cx, reserve, return_list=True)
        reserve2, dx, dhx = dinputs[:3]
        dw = GpuDnnRNNGradWeights()(
            desc, x, hx, y, reserve2, w)
        res = [DisconnectedType()(), dw, dx, dhx]
        if cx is not None:
            res.append(dinputs[3])  # dcx
        return res
fft.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def grad(self, inputs, output_grads):
        gout, = output_grads
        s = inputs[1]
        gf = curfft_op(gout, s)
        # Multiply the last dimension of the gradient by 2, they represent
        # both positive and negative frequencies, except the first
        # and last elements (for even transforms) which are unique.
        idx = [slice(None)] * (gf.ndim - 2) \
            + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
        gf = T.set_subtensor(gf[idx], gf[idx] * 2)
        return [gf, DisconnectedType()()]
test_rop.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
        # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
        # 2013. The bug consists when through a dot operation there is only
        # one differentiable path (i.e. there is no gradient wrt to one of
        # the inputs).
        x = tensor.arange(20.0).reshape([1, 20])
        v = theano.shared(numpy.ones([20]))
        d = tensor.dot(x, v).sum()
        tensor.Rop(tensor.grad(d, v), v, v)
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def grad(self, inputs, g_outputs):
        x, ind1, ind2 = inputs
        gout, = g_outputs
        return [get_item_2lists_grad(x, ind1, ind2, gout),
                grad_undefined(self, 1, ind1, "No gradient for this input"),
                grad_undefined(self, 1, ind2, "No gradient for this input")]
basic.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def dot(x, y):
    """
    Operation for efficiently calculating the dot product when
    one or all operands is sparse. Supported format are CSC and CSR.
    The output of the operation is dense.

    Parameters
    ----------
    x
        Sparse or dense matrix variable.
    y
        Sparse or dense matrix variable.

    Returns
    -------
    The dot product `x`.`y` in a dense format.

    Notes
    -----
    The grad implemented is regular, i.e. not structured.

    At least one of `x` or `y` must be a sparse matrix.

    When the operation has the form dot(csr_matrix, dense)
    the gradient of this operation can be performed inplace
    by UsmmCscDense. This leads to significant speed-ups.

    """

    if hasattr(x, 'getnnz'):
        x = as_sparse_variable(x)
    if hasattr(y, 'getnnz'):
        y = as_sparse_variable(y)

    x_is_sparse_variable = _is_sparse_variable(x)
    y_is_sparse_variable = _is_sparse_variable(y)

    if not x_is_sparse_variable and not y_is_sparse_variable:
        raise TypeError()

    return _dot(x, y)
nlinalg.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def grad(self, inputs, g_outputs):
        r"""The gradient function should return

           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
                           {\partial a_{ij}} +
                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
                           {\partial a_{ij}}\right),

        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.

        Analytic formulae for eigensystem gradients are well-known in
        perturbation theory:

           .. math:: \frac{\partial\,w_n}
                          {\partial a_{ij}} = v_{in}\,v_{jn}


           .. math:: \frac{\partial\,v_{kn}}
                          {\partial a_{ij}} =
                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}

        """
        x, = inputs
        w, v = self(x)
        # Replace gradients wrt disconnected variables with
        # zeros. This is a work-around for issue #1063.
        gw, gv = _zero_disconnected([w, v], g_outputs)
        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
nlinalg.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def perform(self, node, inputs, outputs):
        """
        Implements the "reverse-mode" gradient for the eigensystem of
        a square matrix.

        """
        x, w, v, W, V = inputs
        N = x.shape[0]
        outer = numpy.outer

        def G(n):
            return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
                       for m in xrange(N) if m != n)

        g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
                for n in xrange(N))

        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
        # (triu(a)) only.  This means that partial derivative of
        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
        # for i < j (i > j).  At the same time, non-zero components of
        # the gradient must account for the fact that variation of the
        # opposite triangle contributes to variation of two elements
        # of Hermitian (symmetric) matrix. The following line
        # implements the necessary logic.
        out = self.tri0(g) + self.tri1(g).T

        # The call to self.tri0 in perform upcast from float32 to
        # float64 or from int* to int64 in numpy 1.6.1 but not in
        # 1.6.2. We do not want version dependent dtype in Theano.
        # We think it should be the same as the output.
        outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
extra_ops.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def grad(self, inputs, output_gradients):
        num_ins = len(inputs)
        if num_ins == 3:
            x, v, sorter = inputs
        else:
            x, v = inputs

        x_grad = gradient._float_zeros_like(x)
        v_grad = gradient._float_zeros_like(v)
        if num_ins == 3:
            return [x_grad, v_grad, disconnected_type()]
        else:
            return [x_grad, v_grad]
extra_ops.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if (a.dtype.startswith('complex')):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = basic.abs_(offset)
        pos_offset_flag = basic.ge(offset, 0)
        neg_offset_flag = basic.lt(offset, 0)
        min_wh = basic.minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = basic.minimum(min_wh, width * pos_offset_flag +
                                    height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = basic.cast(start, 'int32')
        step = basic.cast(step, 'int32')
        end = basic.cast(end, 'int32')

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = theano.gradient.grad_undefined(
            self, 2, offset,
            "offset is not defined for non-integer offset so"
            " fill_diagonal_offset(a,val,offset+eps) is undefined")

        return [wr_a, wr_val, wr_offset]


问题


面经


文章

微信
公众号

扫码关注公众号