python类fscalar()的实例源码-第2页-面圈网

adda_network.py 文件源码项目：adda_mnist64 作者: davidtellez 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def __init__(self):

        # Define inputs
        input_var = T.ftensor4('input_var')  # input images (batchx3x64x64)
        labels_classifier_var = T.ivector('labels_classifier_var')  # labels for images
        labels_domain_var = T.ivector('labels_domain_var')  # labels for domain (1 for source, 0 for target)
        learning_rate = T.fscalar('learning_rate')

        # Define classifier networks
        network_classifier = self.network_classifier(input_var)
        network_discriminator = self.network_discriminator(network_classifier['classifier/pool1'])

        # Define outputs
        prediction_classifier = get_output(network_classifier['classifier/output'])  # prob image classification
        prediction_discriminator = get_output(network_discriminator['discriminator/output'])  # prob image domain (should be 1 for source)

        # Define losses (objectives)
        loss_classifier_only = T.mean(categorical_crossentropy(prediction_classifier, labels_classifier_var) * labels_domain_var)
        loss_discriminator = T.mean(categorical_crossentropy(prediction_discriminator, labels_domain_var))
        loss_classifier = loss_classifier_only - loss_discriminator

        # Define performance
        perf_classifier_only = categorical_accuracy(prediction_classifier, labels_classifier_var).mean()
        perf_discriminator = categorical_accuracy(prediction_discriminator, labels_domain_var).mean()

        # Define params
        params_classifier = lasagne.layers.get_all_params(network_classifier['classifier/output'], trainable=True)
        params_discriminator = lasagne.layers.get_all_params(network_discriminator['discriminator/output'], trainable=True)
        params_discriminator = [param for param in params_discriminator if 'discriminator' in param.name]

        # Define updates
        updates_classifier = lasagne.updates.adam(loss_classifier, params_classifier, learning_rate=learning_rate)
        updates_classifier_only = lasagne.updates.adam(loss_classifier_only, params_classifier, learning_rate=learning_rate)
        updates_discriminator = lasagne.updates.adam(loss_discriminator, params_discriminator, learning_rate=learning_rate)

        # Define training functions
        self.train_fn_classifier = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier)
        self.train_fn_classifier_only = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier_only)
        self.train_fn_discriminator = theano.function(
            [input_var, labels_domain_var, learning_rate],
            [loss_discriminator, prediction_discriminator],
            updates=updates_discriminator)

        # Define validation functions
        self.valid_fn_classifier = theano.function(
            [input_var, labels_classifier_var],
            [perf_classifier_only, prediction_classifier])

        self.valid_fn_discriminator = theano.function(
            [input_var, labels_domain_var],
            [perf_discriminator, prediction_discriminator])

black_box_alpha.py 文件源码项目：policy_search_bb-alpha 作者: siemens 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior, batch_size, X_train, y_train, N_train):

        layer_sizes = copy.copy(layer_sizes)
        layer_sizes[ 0 ] = layer_sizes[ 0 ] + 1
        print layer_sizes
        self.batch_size = batch_size
        self.N_train = N_train
        self.X_train = X_train
        self.y_train = y_train

        self.rate = learning_rate

        # We create the network

        self.network = network.Network(layer_sizes, n_samples, v_prior, N_train)

        # index to a batch

        index = T.lscalar()  
        self.indexes = T.vector('index', dtype = 'int32')
        indexes_train = theano.shared(value = np.array(range(0, N_train), dtype = np.int32), borrow = True)

        self.x = T.tensor3('x',dtype=theano.config.floatX)
        self.y = T.matrix('y', dtype =theano.config.floatX)
        self.lr = T.fscalar()

        # The logarithm of the values for the likelihood factors
        sampl = T.bscalar()
        self.fwpass = theano.function(outputs=self.network.output(self.x,False,samples=sampl,use_indices=False), inputs=[self.x,sampl],allow_input_downcast=True)

        ll_train = self.network.log_likelihood_values(self.x, self.y, self.indexes, 0.0, 1.0)


        self.estimate_marginal_ll = (-1.0 * N_train / (self.x.shape[ 1 ] * alpha) * \
            T.sum(LogSumExp(alpha * (T.sum(ll_train, 2) - self.network.log_f_hat() - self.network.log_f_hat_z()), 0)+ \
                T.log(1.0 / n_samples)) - self.network.log_normalizer_q() - 1.0 * N_train / self.x.shape[ 1 ] * self.network.log_normalizer_q_z() + \
            self.network.log_Z_prior())

        # We create a theano function for updating q
        upd = adam(self.estimate_marginal_ll, self.network.params,indexes_train[index*batch_size:(index+1)*batch_size],self.rate,rescale_local=np.float32(N_train/batch_size))

        self.process_minibatch = theano.function([ index], self.estimate_marginal_ll, \
            updates = upd, \
            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size] , [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })

        # We create a theano function for making predictions


        self.error_minibatch_train = theano.function([ index ],
            T.sum((T.mean(self.network.output(self.x,self.indexes), 0, keepdims = True)[ 0, :, : ] - self.y)**2) / layer_sizes[ -1 ],
            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })

        self.ll_minibatch_train = theano.function([ index ], T.sum(LogSumExp(T.sum(ll_train, 2), 0) + T.log(1.0 / n_samples)), \

            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })

model.py 文件源码项目：Towards-a-Biologically-Plausible-Backprop 作者: bscellier 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def __build_weakly_clamped_phase(self):

        n_iterations = T.iscalar('n_iterations')
        epsilon  = T.fscalar('epsilon')
        beta = T.fscalar('beta')
        alphas = [T.fscalar("alpha_W"+str(r+1)) for r in range(len(self.weights))]

        def step(*layers):
            F_sum = T.sum(self.__total_energy(layers, beta))
            layers_dot = T.grad(-F_sum, list(layers)) # temporal derivative of the state (weakly clamped trajectory)
            layers_new = [layers[0]]+[T.clip(layer+epsilon*dot,0.,1.) for layer,dot in zip(layers,layers_dot)][1:]
            return layers_new

        ( layers, updates ) = theano.scan(
            step,
            outputs_info=self.layers,
            n_steps=n_iterations
        )
        layers_weakly_clamped = [layer[-1] for layer in layers]

        E_mean_free           = T.mean(self.__energy(self.layers))
        E_mean_weakly_clamped = T.mean(self.__energy(layers_weakly_clamped))
        biases_dot            = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.biases,  consider_constant=layers_weakly_clamped)
        weights_dot           = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.weights, consider_constant=layers_weakly_clamped)

        biases_new  = [b - alpha * dot for b,alpha,dot in zip(self.biases[1:],alphas,biases_dot[1:])]
        weights_new = [W - alpha * dot for W,alpha,dot in zip(self.weights,   alphas,weights_dot)]

        Delta_log = [T.sqrt( ((W_new - W) ** 2).mean() ) / T.sqrt( (W ** 2).mean() ) for W,W_new in zip(self.weights,weights_new)]

        for bias, bias_new in zip(self.biases[1:],biases_new):
            updates[bias]=bias_new
        for weight, weight_new in zip(self.weights,weights_new):
            updates[weight]=weight_new

        weakly_clamped_phase = theano.function(
            inputs=[n_iterations, epsilon, beta]+alphas,
            outputs=Delta_log,
            updates=updates
        )

        return weakly_clamped_phase

test_opt.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_local_merge_alloc():
    # Add this opt to the default mode,
    # otherwise, FAST_COMPILE fails.
    default_mode = theano.compile.mode.get_default_mode()
    opt_mode = default_mode.including("local_merge_alloc")

    x = T.iscalar('x')
    y = T.iscalar('y')
    y2 = T.iscalar('y2')
    z = T.iscalar('z')
    w = T.iscalar('w')
    m = T.fscalar('m')
    # case 1
    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)
    o = f(0., 1, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)

    # case 2
    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)
    o = f(0., 1, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)

    # case 3
    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
    #   Alloc(m, x, assert(y1, y1==y2), z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
    f = theano.function([m, x, y, y2, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 3
    assert isinstance(topo[-2].op, T.opt.Assert)
    assert isinstance(topo[-1].op, T.Alloc)
    o = f(0., 1, 2, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)
    assert_raises((AssertionError, ValueError), f, 0., 1, 2, 5, 3, 4)

test_opt.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_local_useless_alloc():

    useless_alloc = out2in(local_useless_alloc)
    merge_alloc = out2in(local_merge_alloc)

    x = T.iscalar('x')
    y = T.iscalar('y')
    y2 = T.iscalar('y2')
    z = T.iscalar('z')
    w = T.iscalar('w')
    m = T.fscalar('m')

    # case 1
    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
    g = FunctionGraph([m, x, y, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)

    # case 2
    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
    g = FunctionGraph([m, x, y, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)

    # case 3
    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
    #   Alloc(m, x, assert(y1, y1==y2), z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
    g = FunctionGraph([m, x, y, y2, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 3
    assert isinstance(topo[-2].op, T.opt.Assert)
    assert isinstance(topo[-1].op, T.Alloc)

test_scan.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_one_sequence_one_output_weights_gpu2(self):

        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = theano.tensor.fvector('u')
        x0 = theano.tensor.fscalar('x0')
        W_in = theano.tensor.fscalar('win')
        W = theano.tensor.fscalar('w')
        output, updates = theano.scan(f_rnn,
                                      u,
                                      x0,
                                      [W_in, W],
                                      n_steps=None,
                                      truncate_gradient=-1,
                                      go_backwards=False,
                                      mode=self.mode_with_gpu)

        f2 = theano.function([u, x0, W_in, W],
                             output,
                             updates=updates,
                             allow_input_downcast=True,
                             mode=self.mode_with_gpu)

        # get random initial values
        rng = numpy.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out = numpy.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
        theano_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_values, v_out)

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
                    for node in topo]) == 1
        assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
                    for node in topo]) == 4

        scan_node = [node for node in topo
                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
                    for node in scan_node_topo])
        assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
                        for node in scan_node_topo])
        assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
                        for node in scan_node_topo])

    # This third test checks that scan can deal with a mixture of dtypes as
    # outputs when is running on GPU

test_scan.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_gpu3_mixture_dtype_outputs(self):

        def f_rnn(u_t, x_tm1, W_in, W):
            return (u_t * W_in + x_tm1 * W,
                    tensor.cast(u_t + x_tm1, 'int64'))

        u = theano.tensor.fvector('u')
        x0 = theano.tensor.fscalar('x0')
        W_in = theano.tensor.fscalar('win')
        W = theano.tensor.fscalar('w')
        output, updates = theano.scan(f_rnn,
                                      u,
                                      [x0, None],
                                      [W_in, W],
                                      n_steps=None,
                                      truncate_gradient=-1,
                                      go_backwards=False,
                                      mode=self.mode_with_gpu)

        f2 = theano.function([u, x0, W_in, W],
                             output,
                             updates=updates,
                             allow_input_downcast=True,
                             mode=self.mode_with_gpu)

        # get random initial values
        rng = numpy.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out1 = numpy.zeros((4,))
        v_out2 = numpy.zeros((4,), dtype='int64')
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in xrange(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
            v_out2[step] = numpy.int64(v_u[step] + v_out1[step - 1])

        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_out1, v_out1)
        utt.assert_allclose(theano_out2, v_out2)

        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo
                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        assert self.is_scan_on_gpu(scan_node)

test_blas.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_dot22scalar():
    def cmp(a_shp, b_shp):
        a = tensor.fmatrix()
        b = tensor.fmatrix()
        scalar = tensor.fscalar()
        av = my_rand(*a_shp)
        bv = my_rand(*b_shp)

        f = theano.function(
            [a, b],
            tensor.dot(a, b) * numpy.asarray(4, 'float32'),
            mode=mode_with_gpu)
        f2 = theano.function(
            [a, b],
            tensor.dot(a, b) * numpy.asarray(4, 'float32'))
        t = f.maker.fgraph.toposort()
        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
#                    for n in t])
        assert numpy.allclose(f(av, bv), f2(av, bv))

        f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
                            mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
#                    for n in t])
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))

        f = theano.function([a, b, scalar],
                            tensor.blas._dot22scalar(a, b, scalar),
                            mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
        assert len(t) == 4
        assert isinstance(t[0].op, tcn.GpuFromHost)
        assert isinstance(t[1].op, tcn.GpuFromHost)
        assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
        assert isinstance(t[3].op, tcn.HostFromGpu)
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
    cmp((3, 4), (4, 5))
    cmp((0, 4), (4, 5))
    cmp((3, 4), (4, 0))
    cmp((3, 0), (0, 5))
    cmp((0, 4), (4, 0))
    cmp((0, 0), (0, 0))

test_mlp.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
             n_train=100):

    if config.mode == 'DEBUG_MODE':
        n_train = 1

    if use_gpu:
        w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = tcn.shared_constructor(my_zeros(n_hid), 'b')
        v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
        c = tcn.shared_constructor(my_zeros(n_out), 'c')
    else:
        w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = shared(my_zeros(n_hid), 'b')
        v = shared(my_zeros((n_hid, n_out)), 'c')
        c = shared(my_zeros(n_out), 'c')

    x = tensor.fmatrix('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    hid = tensor.tanh(tensor.dot(x, w) + b)
    out = tensor.tanh(tensor.dot(hid, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    if 0:
        print('loss type', loss.type)

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss], mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])

    if 0:
        for i, n in enumerate(train.maker.fgraph.toposort()):
            print(i, n)

    xval = my_rand(n_batch, n_in)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    t0 = time.time()
    rval = []
    for i in xrange(n_train):
        rval.append(train(xval, yval, lr))
    dt = time.time() - t0

    return numpy.asarray(rval), dt

test_mlp.py 文件源码项目：Theano-Deep-learning 作者: GeekLiB 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def run_conv_nnet1(use_gpu):
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared
    n_batch = 16
    n_kern = 20
    shape_img = (n_batch, 1, 32, 32)
    shape_kern = (n_kern, 1, 5, 5)
    n_train = 10
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        shape_img[2:], shape_kern[2:], 'valid')
    n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
    n_out = 10

    w = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w')
    b = shared_fn(my_zeros((n_kern,)), 'b')
    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)

    hid = tensor.tanh(conv_op(x, w) + b.dimshuffle((0, 'x', 'x')))
    hid_flat = hid.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    # print 'loss type', loss.type

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc(
        [x, y, lr],
        [loss],
        mode=mode,
        updates=[(p, p - g) for p, g in zip(params, gparams)])

#    for i, n in enumerate(train.maker.fgraph.toposort()):
#        print i, n

    xval = my_rand(*shape_img)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    for i in xrange(n_train):
        rval = train(xval, yval, lr)
    # print 'training done'
    return rval

policy_gradiant.py 文件源码项目：a3c 作者: hercky 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def __init__(self, ob_space, action_space, **usercfg):
        """
        Initialize your agent's parameters
        """
        nO = ob_space.shape[0]
        nA = action_space.n
        # Here are all the algorithm parameters. You can modify them by passing in keyword args
        self.config = dict(episode_max_length=100, timesteps_per_batch=10000, n_iter=100, 
            gamma=1.0, stepsize=0.05, nhid=20)
        self.config.update(usercfg)

        # Symbolic variables for observation, action, and advantage
        # These variables stack the results from many timesteps--the first dimension is the timestep
        ob_no = T.fmatrix() # Observation
        a_n = T.ivector() # Discrete action 
        adv_n = T.fvector() # Advantage


        def shared(arr):
            return theano.shared(arr.astype('float64'))

        # Create weights of neural network with one hidden layer
        W0 = shared(np.random.randn(nO,self.config['nhid'])/np.sqrt(nO))
        b0 = shared(np.zeros(self.config['nhid']))
        W1 = shared(1e-4*np.random.randn(self.config['nhid'],nA))
        b1 = shared(np.zeros(nA))
        params = [W0, b0, W1, b1]

        # Action probabilities
        prob_na = T.nnet.softmax(T.tanh(ob_no.dot(W0)+b0[None,:]).dot(W1) + b1[None,:])
        N = ob_no.shape[0]

        # Loss function that we'll differentiate to get the policy gradient
        # Note that we've divided by the total number of timesteps
        loss = T.log(prob_na[T.arange(N), a_n]).dot(adv_n) / N
        stepsize = T.fscalar()
        grads = T.grad(loss, params)
        # Perform parameter updates.
        # I find that sgd doesn't work well
        # updates = sgd_updates(grads, params, stepsize)
        updates = rmsprop_updates(grads, params, stepsize)
        self.pg_update = theano.function([ob_no, a_n, adv_n, stepsize], [], updates=updates, allow_input_downcast=True)
        self.compute_prob = theano.function([ob_no], prob_na, allow_input_downcast=True)

ctrlr_optimizer.py 文件源码项目：Buffe 作者: bentzinir 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):

        params=None

        if trained_model:
            params = common.load_params(trained_model)

        self.lr_func = create_learning_rate_func(solver_params)

        self.x_h_0 = tt.fvector('x_h_0')
        self.v_h_0 = tt.fvector('v_h_0')
        self.t_h_0 = tt.fvector('t_h_0')
        self.x_t_0 = tt.fmatrix('x_t_0')
        self.v_t_0 = tt.fmatrix('v_t_0')
        self.a_t_0 = tt.fmatrix('a_t_0')
        self.t_t_0 = tt.fvector('t_t_0')
        self.time_steps = tt.fvector('t_0')
        self.exist = tt.bvector('exist')
        self.is_leader = tt.fvector('is_leader')
        self.x_goal = tt.fvector('x_goal')
        self.turn_vec_h = tt.fvector('turn_vec_h')
        self.turn_vec_t = tt.fvector('turn_vec_t')
        self.n_steps = tt.iscalar('n_steps')
        self.lr = tt.fscalar('lr')
        self.sn_dir = sn_dir
        self.game_params = game_params
        self.arch_params = arch_params
        self.solver_params = solver_params

        self.model = CONTROLLER(self.x_h_0,
                                self.v_h_0,
                                self.t_h_0,
                                self.x_t_0,
                                self.v_t_0,
                                self.a_t_0,
                                self.t_t_0,
                                self.time_steps,
                                self.exist,
                                self.is_leader,
                                self.x_goal,
                                self.turn_vec_h,
                                self.turn_vec_t,
                                self.n_steps,
                                self.lr,
                                self.game_params,
                                self.arch_params,
                                self.solver_params,
                                params)

ctrlr_optimizer.py 文件源码项目：Buffe 作者: bentzinir 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):

        params=[None, None]

        if trained_model[0]:
            params[0] = common.load_params(trained_model[0])

        if trained_model[1]:
            params[1] = common.load_params(trained_model[1])

        self.lr_func = []
        self.lr_func.append(create_learning_rate_func(solver_params['controler_0']))
        self.lr_func.append(create_learning_rate_func(solver_params['controler_1']))

        self.x_host_0 = tt.fvector('x_host_0')
        self.v_host_0 = tt.fvector('v_host_0')
        self.x_target_0 = tt.fvector('x_target_0')
        self.v_target_0 = tt.fvector('v_target_0')
        self.x_mines_0 = tt.fmatrix('x_mines_0')
        self.mines_map = tt.fmatrix('mines_map')
        self.time_steps = tt.fvector('time_steps')
        self.force = tt.fmatrix('force')
        self.n_steps_0 = tt.iscalar('n_steps_0')
        self.n_steps_1 = tt.iscalar('n_steps_1')
        self.lr = tt.fscalar('lr')
        self.goal_1 = tt.fvector('goal_1')
        self.trnsprnt = tt.fscalar('trnsprnt')
        self.rand_goals = tt.fmatrix('rand_goals')
        self.game_params = game_params
        self.arch_params = arch_params
        self.solver_params = solver_params
        self.sn_dir = sn_dir

        self.model = CONTROLLER(self.x_host_0,
                                self.v_host_0,
                                self.x_target_0,
                                self.v_target_0,
                                self.x_mines_0,
                                self.mines_map,
                                self.time_steps,
                                self.force,
                                self.n_steps_0,
                                self.n_steps_1,
                                self.lr,
                                self.goal_1,
                                self.trnsprnt,
                                self.rand_goals,
                                self.game_params,
                                self.arch_params,
                                self.solver_params,
                                params)

annealing.py 文件源码项目：crayimage 作者: yandexdataschool 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def sa(inputs, loss, params, outputs = (), srng=None, seed=1122334455, iters=32,
       initial_temperature = 1.0e-1, learning_rate=1.0e-2):
  if srng is None:
    # from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams as RandomStreams
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    srng = srng or RandomStreams(seed=seed)

  inputs_cached = [ to_shared(i) for i in inputs ]
  input_setter = OrderedDict()
  for inpc, inp in zip(inputs_cached, inputs):
    input_setter[inpc] = inp

  memorize_inputs = theano.function(inputs, [], updates=input_setter, no_default_updates=True)

  inputs_givens = [
    (inp, inpc)
    for inp, inpc in zip(inputs, inputs_cached)
  ]

  deltas = [
    make_copy(param)
    for param in params
  ]

  alpha = T.fscalar('learning rate')

  delta_setter = OrderedDict([
    (delta, make_uniform(delta, -alpha, alpha, srng))
    for delta in deltas
  ])

  generate_deltas = theano.function([alpha], [], updates=delta_setter, no_default_updates=False)

  probe_givens = [
    (param, param + delta)
    for param, delta in zip(params, deltas)
  ]

  probe = theano.function(
    [], [loss] + list(outputs),
    givens=probe_givens + inputs_givens,
    no_default_updates=True
  )

  params_setter = OrderedDict(probe_givens)

  set_params = theano.function(
    [], [],
    updates=params_setter,
    no_default_updates=True
  )

  return simulated_annealing(
    probe, memorize_inputs, set_params, generate_deltas,
    iters, initial_temperature, learning_rate
  )