python类clone()的实例源码

test_elemwise.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_gt_grad():
    """A user test that failed.

    Something about it made Elemwise.grad return something that was
    too complicated for get_scalar_constant_value to recognize as being 0, so
    gradient.grad reported that it was not a valid gradient of an
    integer.

    """
    floatX = config.floatX
    T = theano.tensor

    input_ = T.vector(dtype=floatX)
    random_values = numpy.random.RandomState(1234).uniform(
                                                low=-1, high=1, size=(2, 2))
    W_values = numpy.asarray(random_values, dtype=floatX)
    W = theano.shared(value=W_values, name='weights')
    correct_score = T.dot(input_, W)
    wrong_input = T.vector(dtype=floatX)
    wrong_score = theano.clone(correct_score, {input_: wrong_input})
    # Hinge loss

    scores = T.ones_like(correct_score) - correct_score + wrong_score
    cost = (scores * (scores > 0)).sum()
    T.grad(cost, input_)
builders.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def infer_shape(self, node, shapes):
        out_shp = theano.scan_module.scan_utils.infer_shape(self.new_outputs,
                                                            self.new_inputs,
                                                            shapes)

        # Clone the output shape so that shape are computed from outer inputs.
        # Note:
        # Here we can do it more simply like:
        #      ret = [theano.clone(shp, replace=repl) for shp in out_shp]
        # But  doing it multiple time could duplicate common subgraph between
        # each shape call. Theano optimizer will clean this up later, but this
        # will ask extra work to the optimizer.
        repl = dict(zip(self.new_inputs, node.inputs))
        cloned = theano.clone(reduce(tuple.__add__, out_shp), replace=repl)
        ret = []
        used = 0
        for i in range(len(out_shp)):
            nb = len(out_shp[i])
            ret.append(cloned[used: used + nb])
            used += nb

        return ret
scan_utils.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def reconstruct_graph(inputs, outputs, tag=None):
    """
    Different interface to clone, that allows you to pass inputs.
    Compared to clone, this method always replaces the inputs with
    new variables of the same type, and returns those (in the same
    order as the original inputs).

    """
    if tag is None:
        tag = ''
    nw_inputs = [safe_new(x, tag) for x in inputs]
    givens = OrderedDict()
    for nw_x, x in izip(nw_inputs, inputs):
        givens[x] = nw_x
    allinputs = theano.gof.graph.inputs(outputs)
    for inp in allinputs:
        if isinstance(inp, theano.Constant):
            givens[inp] = inp.clone()

    nw_outputs = clone(outputs, replace=givens)
    return (nw_inputs, nw_outputs)
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_cloning_no_replace_strict_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = theano.tensor.vector('x')
        y = theano.tensor.vector('y')
        z = theano.shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
                          replace=None,
                          strict=True,
                          share_inputs=True)
        f2_inp = theano.gof.graph.inputs([f2])

        assert z  in f2_inp
        assert x  in f2_inp
        assert y  in f2_inp
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_cloning_no_replace_strict_not_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = theano.tensor.vector('x')
        y = theano.tensor.vector('y')
        z = theano.shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
                          replace=None,
                          strict=True,
                          share_inputs=False)
        f2_inp = theano.gof.graph.inputs([f2])

        assert not z in f2_inp
        assert not x in f2_inp
        assert not y in f2_inp
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_cloning_replace_not_strict_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = theano.tensor.vector('x')
        y = theano.tensor.fvector('y')
        y2 = theano.tensor.dvector('y2')
        z = theano.shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
                          replace=OrderedDict([(y, y2)]),
                          strict=False,
                          share_inputs=True)
        f2_inp = theano.gof.graph.inputs([f2])
        assert z in f2_inp
        assert x in f2_inp
        assert y2 in f2_inp
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_cloning_replace_strict_not_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = theano.tensor.vector('x')
        y = theano.tensor.vector('y')
        y2 = theano.tensor.vector('y2')
        z = theano.shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
                          replace=[(y, y2)],
                          strict=True,
                          share_inputs=False)
        f2_inp = theano.gof.graph.inputs([f2])
        assert not z in f2_inp
        assert not x in f2_inp
        assert not y2 in f2_inp
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_cloning_replace_not_strict_not_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = theano.tensor.vector('x')
        y = theano.tensor.fvector('y')
        y2 = theano.tensor.dvector('y2')
        z = theano.shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
                          replace=[(y, y2)],
                          strict=False,
                          share_inputs=False)
        f2_inp = theano.gof.graph.inputs([f2])
        assert not z  in f2_inp
        assert not x  in f2_inp
        assert not y2 in f2_inp

    # TEST RE-ordering of inputs
    # some rnn with multiple outputs and multiple inputs; other
    # dimension instead of scalars/vectors
basic.py 文件源码 项目:NMT 作者: tuzhaopeng 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def clone(**new_inputs):
        new_obj = utils.copy(self)
        # Reorder inputs
        assert len(new_obj.inputs) == len(new_inputs.items())
        pairs=[(x, new_inputs[x.name]) for x in inputs]
        new_obj.inputs = new_inputs.values()
        new_obj.out = theano.clone(new_obj.out, replace=pairs)
        if hasattr(new_obj, 'cost'):
            new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
        if hasattr(new_obj, 'sample'):
            new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
        return new_obj
basic.py 文件源码 项目:NMT 作者: tuzhaopeng 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def clone(**new_inputs):
        new_obj = utils.copy(self)
        # Reorder inputs
        assert len(new_obj.inputs) == len(new_inputs.items())
        pairs=[(x, new_inputs[x.name]) for x in inputs]
        new_obj.inputs = new_inputs.values()
        new_obj.out = theano.clone(new_obj.out, replace=pairs)
        if hasattr(new_obj, 'cost'):
            new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
        if hasattr(new_obj, 'sample'):
            new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
        return new_obj
gradient_optimizer.py 文件源码 项目:sesame-paste-noodle 作者: aissehust 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __call__(self, cost, params):
        grads = T.grad(cost=cost ,wrt=params)
        updates = []
        for p, g in zip(params, grads):
            v = theano.shared(p.get_value() * 0.)
            new_v = self.mu * v + self.lr * theano.clone(g, replace = {p: p - self.mu * v})
            updates.append((v, new_v))
            updates.append((p, p - new_v))

        return updates
batch_norm.py 文件源码 项目:PAN 作者: hworang77 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.fixed:
            # use stored mean and std
            mean = self.mean
            std = self.std
        else:
            # use this batch's mean and std
            mean = input.mean(self.axes, keepdims=True)
            #std = input.std(self.axes, keepdims=True)
            std = (input.var(self.axes, keepdims=True)+self.epsilon).sqrt()
            # and update the stored mean and std:
            # we create (memory-aliased) clones of the stored mean and std
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * mean)
            running_std.default_update = ((1 - self.alpha) * running_std +
                                          self.alpha * std)
            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            std += 0 * running_std
        #std += self.epsilon
        mean = T.addbroadcast(mean, *self.axes)
        std = T.addbroadcast(std, *self.axes)
        beta = T.addbroadcast(self.beta, *self.axes)
        gamma = T.addbroadcast(self.gamma, *self.axes)
#        normalized = (input - mean) * (gamma / std) + beta
        normalized = (input - mean) / std
        if self.rescale:
            normalized = normalized * gamma + beta
        return self.nonlinearity(normalized)
batch_norm.py 文件源码 项目:cnn-bnn 作者: jpdz 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            # use stored mean and std
            mean = self.mean
            std = self.std
        else:
            # use this batch's mean and std
            mean = input.mean(self.axes, keepdims=True)
            std = input.std(self.axes, keepdims=True)
            # and update the stored mean and std:
            # we create (memory-aliased) clones of the stored mean and std
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * mean)
            running_std.default_update = ((1 - self.alpha) * running_std +
                                          self.alpha * std)
            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            std += 0 * running_std
        std += self.epsilon
        mean = T.addbroadcast(mean, *self.axes)
        std = T.addbroadcast(std, *self.axes)
        beta = T.addbroadcast(self.beta, *self.axes)
        gamma = T.addbroadcast(self.gamma, *self.axes)
        normalized = (input - mean) * (gamma / std) + beta
        return self.nonlinearity(normalized)
xnor_net.py 文件源码 项目:theano-xnor-net 作者: gplhegde 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def convolve(self, input, deterministic=False, **kwargs):
        """ Binary convolution. Both inputs and weights are binary (+1 or -1)
        This overrides convolve operation from Conv2DLayer implementation
        """
        if(self.xnor):
            # compute the binary inputs H and the scaling matrix K
            input, K = binarize_conv_input(input, self.beta_filter)

            # Compute the binarized filters are the scaling matrix
            self.Wb, alpha = binarize_conv_filters(self.W)
            if not deterministic:
                old_alpha = theano.clone(self.xalpha, share_inputs=False)
                old_alpha.default_update = alpha
                alpha += 0*old_alpha
            else:
                alpha = self.xalpha 

            # TODO: Use XNOR ops for the convolution. As of now using Lasagne's convolution for
            # functionality verification.
            # approx weight tensor
            #W_full_precision = self.Wb * alpha.dimshuffle(0, 'x', 'x', 'x')
            Wr = self.W

            self.W = self.Wb

            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)
            # restore the approx full precision weight for gradiant computation
            #self.W = W_full_precision
            self.W = Wr

            # scale by K and alpha
            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
            # The super class method automatically adds bias. Somehow need to overcome this..
            # may subtract the bias, scale by alpha and beta ans then add bias ?
            feat_maps = feat_maps * K

            feat_maps = feat_maps * alpha.dimshuffle('x', 0, 'x', 'x')
        else:
            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)

        return feat_maps
xnor_net.py 文件源码 项目:theano-xnor-net 作者: gplhegde 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        """ Binary dense layer dot product computation
        """
        if(self.xnor):
            # binarize the input
            bin_input, beta = binarize_fc_input(input)

            # compute weight scaling factor.
            self.Wb, alpha = binarize_fc_weights(self.W)
            if not deterministic:
                old_alpha = theano.clone(self.xalpha, share_inputs=False)
                old_alpha.default_update = alpha
                alpha += 0*old_alpha
            else:
                alpha = self.xalpha

            #W_full_precision = self.Wb * alpha.dimshuffle('x', 0)
            Wr = self.W
            self.W = self.Wb

            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)
            # scale the output by alpha and beta
            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
            # The super class method automatically adds bias. Somehow need to overcome this..
            # may subtract the bias, scale by alpha and beta ans then add bias ?
            fc_out = fc_out * beta.dimshuffle(0, 'x')

            fc_out = fc_out * alpha.dimshuffle('x', 0)

            #self.W = W_full_precision
            self.W = Wr
        else:
            fc_out = super(DenseLayer, self).get_output_for(input, **kwargs)

        return fc_out

        # find the dot product
        # scale the output by alpha and beta
simple.py 文件源码 项目:pyrl 作者: frsong 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_dOmega_dWrec(self, loss, x):
        # Pascanu's trick
        scan_node = x.owner.inputs[0].owner
        assert isinstance(scan_node.op, theano.scan_module.scan_op.Scan)
        npos   = scan_node.op.n_seqs + 1
        init_x = scan_node.inputs[npos]
        g_x    = theano.grad(loss, init_x)

        # To force immediate derivatives
        d_xt = T.tensor3('d_xt')
        xt   = T.tensor3('xt')

        # Vanishing-gradient regularization
        self.bound        = 1e-20
        self.lambda_Omega = 2

        # Wrec
        Wrec = self.params['Wrec']

        # Numerator
        alpha = self.alpha
        num   = (1 - alpha)*d_xt[1:] + T.dot(alpha*d_xt[1:], Wrec.T)*self.df_hidden(xt)
        num   = (num**2).sum(axis=2)

        # Denominator
        denom = (d_xt[1:]**2).sum(axis=2)

        # Omega
        bound  = self.bound
        Omega  = (T.switch(T.ge(denom, bound), num/denom, 1) - 1)**2
        nelems = T.mean(T.ge(denom, bound), axis=1)
        Omega  = Omega.mean(axis=1).sum()/nelems.sum()

        # Gradient w.r.t Wrec
        g_Wrec = theano.grad(Omega, Wrec)
        g_Wrec = theano.clone(g_Wrec, replace=[(d_xt, g_x), (xt, x)])

        return self.lambda_Omega * g_Wrec
scan_utils.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def forced_replace(out, x, y):
    """
    Check all internal values of the graph that compute the variable ``out``
    for occurrences of values identical with ``x``. If such occurrences are
    encountered then they are replaced with variable ``y``.

    Parameters
    ----------
    out : Theano Variable
    x : Theano Variable
    y : Theano Variable

    Examples
    --------
    out := sigmoid(wu)*(1-sigmoid(wu))
    x := sigmoid(wu)
    forced_replace(out, x, y) := y*(1-y)

    """
    if out is None:
        return None

    # ``visited`` is a set of nodes that are already known and don't need to be
    # checked again, speeding up the traversal of multiply-connected graphs.
    visited = set()
    def local_traverse(graph, x):
        if graph in visited:
            return []
        visited.add(graph)
        if equal_computations([graph], [x]):
            return [graph]
        elif not graph.owner:
            return []
        else:
            rval = []
            for inp in graph.owner.inputs:
                rval += local_traverse(inp, x)
            return rval
    to_replace = local_traverse(out, x)
    return clone(out, replace=OrderedDict((v, y) for v in to_replace))
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_inplace3(self):
        rng = numpy.random.RandomState(utt.fetch_seed())

        vx0 = asarrayX(rng.uniform())
        vx1 = asarrayX(rng.uniform())
        x0 = theano.shared(vx0)
        x1 = theano.shared(vx1)
        outputs, updates = theano.scan(lambda x, y: (x + asarrayX(1),
                                                     y + asarrayX(1)),
                                       [],
                                       [x0, x1],
                                       n_steps=3)
        x0 = asarrayX(numpy.zeros((3,)))
        x0[0] = vx0
        x0 = theano.tensor.constant(x0)
        to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
        outputs = theano.clone(outputs,
                               replace=[(to_replace, x0)])
        mode = theano.compile.mode.get_mode(None).including('inplace')
        f9 = theano.function([],
                             outputs,
                             updates=updates,
                             mode=mode)
        scan_node = [x for x in f9.maker.fgraph.toposort()
                     if isinstance(x.op, theano.scan_module.scan_op.Scan)]
        assert 0 not in scan_node[0].op.destroy_map.keys()
        assert 1 in scan_node[0].op.destroy_map.keys()

    # Shared variable with updates
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_clone(self):
        def test(x, y, mention_y):
            if mention_y:
                d = 0.1 + 0 * y
            else:
                d = 0.1
            out = theano.clone(y, replace={x: x + d})
            # theano.printing.debugprint(out)
            return theano.function([], out)()

        x = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=False),
                              1.21000003815)
        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=True),
                              1.21000003815)
basic.py 文件源码 项目:NMT-Coverage 作者: tuzhaopeng 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def clone(**new_inputs):
        new_obj = utils.copy(self)
        # Reorder inputs
        assert len(new_obj.inputs) == len(new_inputs.items())
        pairs=[(x, new_inputs[x.name]) for x in inputs]
        new_obj.inputs = new_inputs.values()
        new_obj.out = theano.clone(new_obj.out, replace=pairs)
        if hasattr(new_obj, 'cost'):
            new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
        if hasattr(new_obj, 'sample'):
            new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
        return new_obj
basic.py 文件源码 项目:NMT-Coverage 作者: tuzhaopeng 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def clone(**new_inputs):
        new_obj = utils.copy(self)
        # Reorder inputs
        assert len(new_obj.inputs) == len(new_inputs.items())
        pairs=[(x, new_inputs[x.name]) for x in inputs]
        new_obj.inputs = new_inputs.values()
        new_obj.out = theano.clone(new_obj.out, replace=pairs)
        if hasattr(new_obj, 'cost'):
            new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
        if hasattr(new_obj, 'sample'):
            new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
        return new_obj
pseudograd.py 文件源码 项目:crayimage 作者: yandexdataschool 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def pseudograd(loss, params, srng=None, temperature = 1.0e-1,
               learning_rate=1.0e-2, rho2=0.95):


  one = T.constant(1.0)
  zero = T.constant(0.0)

  deltas = [ make_normal(param, srng=srng) for param in params ]
  momentum = [ make_copy(param) for param in params ]

  new_params = [
    param + learning_rate * delta
    for param, delta, m in zip(params, deltas, momentum)
  ]

  new_loss = theano.clone(
    loss, replace=dict(zip(params, new_params))
  )

  accepting_p = T.exp((loss - new_loss) / temperature)
  u = srng.uniform(size=(), dtype=loss.dtype)

  cond = T.or_(T.or_(u > accepting_p, T.isnan(new_loss)), T.isinf(new_loss))
  step = T.switch(cond, zero, one)

  updates = OrderedDict()

  for m, delta in zip(momentum, deltas):
    updates[m] = m * rho2 + (one - rho2) * delta * step

  for param, m in zip(params, momentum):
    updates[param] = param + learning_rate * m

  return updates
neural_network.py 文件源码 项目:RobotNSGA 作者: LuisLaraP 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def add_layer(self, new_layer):
        '''Adds the given layer to the network'''
        self.layers.append(new_layer)
        self.output = theano.clone(new_layer.output, replace={new_layer.input: self.output})
        self.size += new_layer.size
lasagne_layers.py 文件源码 项目:third_person_im 作者: bstadie 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
                                  deterministic)
        if use_averages:
            mean = self.mean
            std = self.std
        else:
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized
cost_layers.py 文件源码 项目:NMT 作者: tuzhaopeng 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads
cost_layers.py 文件源码 项目:NMT 作者: tuzhaopeng 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads
layers.py 文件源码 项目:kaggle-right-whale 作者: felixlaumon 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_var = input.var(self.axes)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
                                  deterministic)
        if use_averages:
            mean = self.mean
            var = self.var
        else:
            mean = input_mean
            var = input_var

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_var = theano.clone(self.var, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_var.default_update = ((1 - self.alpha) * running_var +
                                          self.alpha * input_var)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            var += 0 * running_var

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(self.beta.ndim))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = self.beta.dimshuffle(pattern)
        gamma = self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = T.sqrt(var + self.epsilon)
        std = std.dimshuffle(pattern)

        # normalize
        # normalized = (input - mean) * (gamma / std) + beta
        normalized = T.nnet.batch_normalization(input, gamma=gamma, beta=beta,
                                                mean=mean, std=std,
                                                mode=self.mode)
        return self.nonlinearity(normalized)
lasagne_layers.py 文件源码 项目:rllabplusplus 作者: shaneshixiang 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
                                  deterministic)
        if use_averages:
            mean = self.mean
            std = self.std
        else:
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized
batch_norm_layer.py 文件源码 项目:experiments 作者: tencia 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
                                  deterministic)
        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
        else:
            mean = input_mean
            inv_std = input_inv_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            inv_std += 0 * running_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
custom_layers.py 文件源码 项目:EUNN-theano 作者: iguanaus 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def call(self, x, mask=None):
        input_dim = self.input_dim
        input_type='real'
        out_every_t=False
        loss_function='MSE'
        output_type='real'
        flag_feed_forward=False
        flag_use_mask=False
        hidden_bias_mean=np.float32(0.0)
        hidden_bias_init='zero'
        Wimpl=self.unitary_impl
        if ('full' in Wimpl):
            Wimpl='full'
        elif (Wimpl=='ASB2016'):
            Wimpl='adhoc'
            #hidden_bias_init='rand'
        elif (Wimpl=='ASB2016_fast'):
            Wimpl='adhoc_fast'
        n_layers=1
        seed=1234
        x_spec=K.permute_dimensions(x,(1,0,2))
        inputs, parameters, costs = models.complex_RNN(input_dim, self.hidden_dim, self.output_dim, input_type=input_type,out_every_t=out_every_t, loss_function=loss_function,output_type=output_type,flag_feed_forward=flag_feed_forward,flag_return_lin_output=True,x_spec=x_spec,flag_use_mask=flag_use_mask,hidden_bias_mean=hidden_bias_mean,Wimpl=Wimpl,flag_return_hidden_states=True,n_layers=n_layers,seed=seed,hidden_bias_init=hidden_bias_init)

        lin_output=costs[2]
        #self.hidden_states=costs[3]

        if (self.unitary_impl=='full'):
            # just use lrng for learning rate on this parameter
            parameters[-1].name+='full_natGrad'
        elif (self.unitary_impl=='full_natGrad'):
            # use fixed lrng with natural gradient update
            parameters[-1].name+='_natGrad_unitaryAug'
        elif (self.unitary_impl=='full_natGradRMS'):
            # use fixed lrng with natural gradient update and RMSprop-style gradient adjustment
            parameters[-1].name+='_natGradRMS_unitaryAug'
        elif (self.unitary_impl=='full_enforceComplex'):
            # swap out 2Nx2N augmented unitary matrix for Nx2N, which ensures the 
            # complex number constraint is satisfied 
            parameters[-1].name+='full_natGrad'
            Waug=parameters[-1]
            WReIm=K.variable(value=Waug[:Waug.shape[1]/2,:].eval(),name=Waug.name)
            WaugFull=K.concatenate( (WReIm, K.concatenate((-WReIm[:,WReIm.shape[1]/2:],WReIm[:,:WReIm.shape[1]/2]),axis=1)),axis=0 )
            lin_output_new = theano.clone(lin_output,replace={parameters[-1]:WaugFull})
            lin_output = lin_output_new
            parameters[-1]=WReIm

        self.trainable_weights = parameters

        return lin_output


问题


面经


文章

微信
公众号

扫码关注公众号