python类fscalar()的实例源码

adda_network.py 文件源码 项目:adda_mnist64 作者: davidtellez 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):

        # Define inputs
        input_var = T.ftensor4('input_var')  # input images (batchx3x64x64)
        labels_classifier_var = T.ivector('labels_classifier_var')  # labels for images
        labels_domain_var = T.ivector('labels_domain_var')  # labels for domain (1 for source, 0 for target)
        learning_rate = T.fscalar('learning_rate')

        # Define classifier networks
        network_classifier = self.network_classifier(input_var)
        network_discriminator = self.network_discriminator(network_classifier['classifier/pool1'])

        # Define outputs
        prediction_classifier = get_output(network_classifier['classifier/output'])  # prob image classification
        prediction_discriminator = get_output(network_discriminator['discriminator/output'])  # prob image domain (should be 1 for source)

        # Define losses (objectives)
        loss_classifier_only = T.mean(categorical_crossentropy(prediction_classifier, labels_classifier_var) * labels_domain_var)
        loss_discriminator = T.mean(categorical_crossentropy(prediction_discriminator, labels_domain_var))
        loss_classifier = loss_classifier_only - loss_discriminator

        # Define performance
        perf_classifier_only = categorical_accuracy(prediction_classifier, labels_classifier_var).mean()
        perf_discriminator = categorical_accuracy(prediction_discriminator, labels_domain_var).mean()

        # Define params
        params_classifier = lasagne.layers.get_all_params(network_classifier['classifier/output'], trainable=True)
        params_discriminator = lasagne.layers.get_all_params(network_discriminator['discriminator/output'], trainable=True)
        params_discriminator = [param for param in params_discriminator if 'discriminator' in param.name]

        # Define updates
        updates_classifier = lasagne.updates.adam(loss_classifier, params_classifier, learning_rate=learning_rate)
        updates_classifier_only = lasagne.updates.adam(loss_classifier_only, params_classifier, learning_rate=learning_rate)
        updates_discriminator = lasagne.updates.adam(loss_discriminator, params_discriminator, learning_rate=learning_rate)

        # Define training functions
        self.train_fn_classifier = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier)
        self.train_fn_classifier_only = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier_only)
        self.train_fn_discriminator = theano.function(
            [input_var, labels_domain_var, learning_rate],
            [loss_discriminator, prediction_discriminator],
            updates=updates_discriminator)

        # Define validation functions
        self.valid_fn_classifier = theano.function(
            [input_var, labels_classifier_var],
            [perf_classifier_only, prediction_classifier])

        self.valid_fn_discriminator = theano.function(
            [input_var, labels_domain_var],
            [perf_discriminator, prediction_discriminator])
black_box_alpha.py 文件源码 项目:policy_search_bb-alpha 作者: siemens 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior, batch_size, X_train, y_train, N_train):

        layer_sizes = copy.copy(layer_sizes)
        layer_sizes[ 0 ] = layer_sizes[ 0 ] + 1
        print layer_sizes
        self.batch_size = batch_size
        self.N_train = N_train
        self.X_train = X_train
        self.y_train = y_train

        self.rate = learning_rate

        # We create the network

        self.network = network.Network(layer_sizes, n_samples, v_prior, N_train)

        # index to a batch

        index = T.lscalar()  
        self.indexes = T.vector('index', dtype = 'int32')
        indexes_train = theano.shared(value = np.array(range(0, N_train), dtype = np.int32), borrow = True)

        self.x = T.tensor3('x',dtype=theano.config.floatX)
        self.y = T.matrix('y', dtype =theano.config.floatX)
        self.lr = T.fscalar()

        # The logarithm of the values for the likelihood factors
        sampl = T.bscalar()
        self.fwpass = theano.function(outputs=self.network.output(self.x,False,samples=sampl,use_indices=False), inputs=[self.x,sampl],allow_input_downcast=True)

        ll_train = self.network.log_likelihood_values(self.x, self.y, self.indexes, 0.0, 1.0)


        self.estimate_marginal_ll = (-1.0 * N_train / (self.x.shape[ 1 ] * alpha) * \
            T.sum(LogSumExp(alpha * (T.sum(ll_train, 2) - self.network.log_f_hat() - self.network.log_f_hat_z()), 0)+ \
                T.log(1.0 / n_samples)) - self.network.log_normalizer_q() - 1.0 * N_train / self.x.shape[ 1 ] * self.network.log_normalizer_q_z() + \
            self.network.log_Z_prior())

        # We create a theano function for updating q
        upd = adam(self.estimate_marginal_ll, self.network.params,indexes_train[index*batch_size:(index+1)*batch_size],self.rate,rescale_local=np.float32(N_train/batch_size))

        self.process_minibatch = theano.function([ index], self.estimate_marginal_ll, \
            updates = upd, \
            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size] , [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })

        # We create a theano function for making predictions


        self.error_minibatch_train = theano.function([ index ],
            T.sum((T.mean(self.network.output(self.x,self.indexes), 0, keepdims = True)[ 0, :, : ] - self.y)**2) / layer_sizes[ -1 ],
            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })

        self.ll_minibatch_train = theano.function([ index ], T.sum(LogSumExp(T.sum(ll_train, 2), 0) + T.log(1.0 / n_samples)), \

            givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
            self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
            self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })
model.py 文件源码 项目:Towards-a-Biologically-Plausible-Backprop 作者: bscellier 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __build_weakly_clamped_phase(self):

        n_iterations = T.iscalar('n_iterations')
        epsilon  = T.fscalar('epsilon')
        beta = T.fscalar('beta')
        alphas = [T.fscalar("alpha_W"+str(r+1)) for r in range(len(self.weights))]

        def step(*layers):
            F_sum = T.sum(self.__total_energy(layers, beta))
            layers_dot = T.grad(-F_sum, list(layers)) # temporal derivative of the state (weakly clamped trajectory)
            layers_new = [layers[0]]+[T.clip(layer+epsilon*dot,0.,1.) for layer,dot in zip(layers,layers_dot)][1:]
            return layers_new

        ( layers, updates ) = theano.scan(
            step,
            outputs_info=self.layers,
            n_steps=n_iterations
        )
        layers_weakly_clamped = [layer[-1] for layer in layers]

        E_mean_free           = T.mean(self.__energy(self.layers))
        E_mean_weakly_clamped = T.mean(self.__energy(layers_weakly_clamped))
        biases_dot            = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.biases,  consider_constant=layers_weakly_clamped)
        weights_dot           = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.weights, consider_constant=layers_weakly_clamped)

        biases_new  = [b - alpha * dot for b,alpha,dot in zip(self.biases[1:],alphas,biases_dot[1:])]
        weights_new = [W - alpha * dot for W,alpha,dot in zip(self.weights,   alphas,weights_dot)]

        Delta_log = [T.sqrt( ((W_new - W) ** 2).mean() ) / T.sqrt( (W ** 2).mean() ) for W,W_new in zip(self.weights,weights_new)]

        for bias, bias_new in zip(self.biases[1:],biases_new):
            updates[bias]=bias_new
        for weight, weight_new in zip(self.weights,weights_new):
            updates[weight]=weight_new

        weakly_clamped_phase = theano.function(
            inputs=[n_iterations, epsilon, beta]+alphas,
            outputs=Delta_log,
            updates=updates
        )

        return weakly_clamped_phase
test_opt.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def test_local_merge_alloc():
    # Add this opt to the default mode,
    # otherwise, FAST_COMPILE fails.
    default_mode = theano.compile.mode.get_default_mode()
    opt_mode = default_mode.including("local_merge_alloc")

    x = T.iscalar('x')
    y = T.iscalar('y')
    y2 = T.iscalar('y2')
    z = T.iscalar('z')
    w = T.iscalar('w')
    m = T.fscalar('m')
    # case 1
    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)
    o = f(0., 1, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)

    # case 2
    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)
    o = f(0., 1, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)

    # case 3
    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
    #   Alloc(m, x, assert(y1, y1==y2), z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
    f = theano.function([m, x, y, y2, z, w], output, mode=opt_mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 3
    assert isinstance(topo[-2].op, T.opt.Assert)
    assert isinstance(topo[-1].op, T.Alloc)
    o = f(0., 1, 2, 2, 3, 4)
    assert o.shape == (1, 2, 3, 4)
    assert_raises((AssertionError, ValueError), f, 0., 1, 2, 5, 3, 4)
test_opt.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_local_useless_alloc():

    useless_alloc = out2in(local_useless_alloc)
    merge_alloc = out2in(local_merge_alloc)

    x = T.iscalar('x')
    y = T.iscalar('y')
    y2 = T.iscalar('y2')
    z = T.iscalar('z')
    w = T.iscalar('w')
    m = T.fscalar('m')

    # case 1
    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
    g = FunctionGraph([m, x, y, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)

    # case 2
    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
    g = FunctionGraph([m, x, y, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, T.Alloc)

    # case 3
    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
    #   Alloc(m, x, assert(y1, y1==y2), z, w)
    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
    g = FunctionGraph([m, x, y, y2, z, w], [output])

    useless_alloc.optimize(g)
    merge_alloc.optimize(g)
    useless_alloc.optimize(g)

    topo = g.toposort()
    assert len(topo) == 3
    assert isinstance(topo[-2].op, T.opt.Assert)
    assert isinstance(topo[-1].op, T.Alloc)
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_one_sequence_one_output_weights_gpu2(self):

        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = theano.tensor.fvector('u')
        x0 = theano.tensor.fscalar('x0')
        W_in = theano.tensor.fscalar('win')
        W = theano.tensor.fscalar('w')
        output, updates = theano.scan(f_rnn,
                                      u,
                                      x0,
                                      [W_in, W],
                                      n_steps=None,
                                      truncate_gradient=-1,
                                      go_backwards=False,
                                      mode=self.mode_with_gpu)

        f2 = theano.function([u, x0, W_in, W],
                             output,
                             updates=updates,
                             allow_input_downcast=True,
                             mode=self.mode_with_gpu)

        # get random initial values
        rng = numpy.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out = numpy.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in xrange(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
        theano_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_values, v_out)

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
                    for node in topo]) == 1
        assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
                    for node in topo]) == 4

        scan_node = [node for node in topo
                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
                    for node in scan_node_topo])
        assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
                        for node in scan_node_topo])
        assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
                        for node in scan_node_topo])

    # This third test checks that scan can deal with a mixture of dtypes as
    # outputs when is running on GPU
test_scan.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_gpu3_mixture_dtype_outputs(self):

        def f_rnn(u_t, x_tm1, W_in, W):
            return (u_t * W_in + x_tm1 * W,
                    tensor.cast(u_t + x_tm1, 'int64'))

        u = theano.tensor.fvector('u')
        x0 = theano.tensor.fscalar('x0')
        W_in = theano.tensor.fscalar('win')
        W = theano.tensor.fscalar('w')
        output, updates = theano.scan(f_rnn,
                                      u,
                                      [x0, None],
                                      [W_in, W],
                                      n_steps=None,
                                      truncate_gradient=-1,
                                      go_backwards=False,
                                      mode=self.mode_with_gpu)

        f2 = theano.function([u, x0, W_in, W],
                             output,
                             updates=updates,
                             allow_input_downcast=True,
                             mode=self.mode_with_gpu)

        # get random initial values
        rng = numpy.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5., high=5.)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out1 = numpy.zeros((4,))
        v_out2 = numpy.zeros((4,), dtype='int64')
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in xrange(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
            v_out2[step] = numpy.int64(v_u[step] + v_out1[step - 1])

        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_out1, v_out1)
        utt.assert_allclose(theano_out2, v_out2)

        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo
                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        assert self.is_scan_on_gpu(scan_node)
test_blas.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_dot22scalar():
    def cmp(a_shp, b_shp):
        a = tensor.fmatrix()
        b = tensor.fmatrix()
        scalar = tensor.fscalar()
        av = my_rand(*a_shp)
        bv = my_rand(*b_shp)

        f = theano.function(
            [a, b],
            tensor.dot(a, b) * numpy.asarray(4, 'float32'),
            mode=mode_with_gpu)
        f2 = theano.function(
            [a, b],
            tensor.dot(a, b) * numpy.asarray(4, 'float32'))
        t = f.maker.fgraph.toposort()
        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
#                    for n in t])
        assert numpy.allclose(f(av, bv), f2(av, bv))

        f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
                            mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
#                    for n in t])
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))

        f = theano.function([a, b, scalar],
                            tensor.blas._dot22scalar(a, b, scalar),
                            mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
        assert len(t) == 4
        assert isinstance(t[0].op, tcn.GpuFromHost)
        assert isinstance(t[1].op, tcn.GpuFromHost)
        assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
        assert isinstance(t[3].op, tcn.HostFromGpu)
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
    cmp((3, 4), (4, 5))
    cmp((0, 4), (4, 5))
    cmp((3, 4), (4, 0))
    cmp((3, 0), (0, 5))
    cmp((0, 4), (4, 0))
    cmp((0, 0), (0, 0))
test_mlp.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
             n_train=100):

    if config.mode == 'DEBUG_MODE':
        n_train = 1

    if use_gpu:
        w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = tcn.shared_constructor(my_zeros(n_hid), 'b')
        v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
        c = tcn.shared_constructor(my_zeros(n_out), 'c')
    else:
        w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = shared(my_zeros(n_hid), 'b')
        v = shared(my_zeros((n_hid, n_out)), 'c')
        c = shared(my_zeros(n_out), 'c')

    x = tensor.fmatrix('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    hid = tensor.tanh(tensor.dot(x, w) + b)
    out = tensor.tanh(tensor.dot(hid, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    if 0:
        print('loss type', loss.type)

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss], mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])

    if 0:
        for i, n in enumerate(train.maker.fgraph.toposort()):
            print(i, n)

    xval = my_rand(n_batch, n_in)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    t0 = time.time()
    rval = []
    for i in xrange(n_train):
        rval.append(train(xval, yval, lr))
    dt = time.time() - t0

    return numpy.asarray(rval), dt
test_mlp.py 文件源码 项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def run_conv_nnet1(use_gpu):
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared
    n_batch = 16
    n_kern = 20
    shape_img = (n_batch, 1, 32, 32)
    shape_kern = (n_kern, 1, 5, 5)
    n_train = 10
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        shape_img[2:], shape_kern[2:], 'valid')
    n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
    n_out = 10

    w = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w')
    b = shared_fn(my_zeros((n_kern,)), 'b')
    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)

    hid = tensor.tanh(conv_op(x, w) + b.dimshuffle((0, 'x', 'x')))
    hid_flat = hid.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    # print 'loss type', loss.type

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc(
        [x, y, lr],
        [loss],
        mode=mode,
        updates=[(p, p - g) for p, g in zip(params, gparams)])

#    for i, n in enumerate(train.maker.fgraph.toposort()):
#        print i, n

    xval = my_rand(*shape_img)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    for i in xrange(n_train):
        rval = train(xval, yval, lr)
    # print 'training done'
    return rval
policy_gradiant.py 文件源码 项目:a3c 作者: hercky 项目源码 文件源码 阅读 14 收藏 0 点赞 0 评论 0
def __init__(self, ob_space, action_space, **usercfg):
        """
        Initialize your agent's parameters
        """
        nO = ob_space.shape[0]
        nA = action_space.n
        # Here are all the algorithm parameters. You can modify them by passing in keyword args
        self.config = dict(episode_max_length=100, timesteps_per_batch=10000, n_iter=100, 
            gamma=1.0, stepsize=0.05, nhid=20)
        self.config.update(usercfg)

        # Symbolic variables for observation, action, and advantage
        # These variables stack the results from many timesteps--the first dimension is the timestep
        ob_no = T.fmatrix() # Observation
        a_n = T.ivector() # Discrete action 
        adv_n = T.fvector() # Advantage


        def shared(arr):
            return theano.shared(arr.astype('float64'))

        # Create weights of neural network with one hidden layer
        W0 = shared(np.random.randn(nO,self.config['nhid'])/np.sqrt(nO))
        b0 = shared(np.zeros(self.config['nhid']))
        W1 = shared(1e-4*np.random.randn(self.config['nhid'],nA))
        b1 = shared(np.zeros(nA))
        params = [W0, b0, W1, b1]

        # Action probabilities
        prob_na = T.nnet.softmax(T.tanh(ob_no.dot(W0)+b0[None,:]).dot(W1) + b1[None,:])
        N = ob_no.shape[0]

        # Loss function that we'll differentiate to get the policy gradient
        # Note that we've divided by the total number of timesteps
        loss = T.log(prob_na[T.arange(N), a_n]).dot(adv_n) / N
        stepsize = T.fscalar()
        grads = T.grad(loss, params)
        # Perform parameter updates.
        # I find that sgd doesn't work well
        # updates = sgd_updates(grads, params, stepsize)
        updates = rmsprop_updates(grads, params, stepsize)
        self.pg_update = theano.function([ob_no, a_n, adv_n, stepsize], [], updates=updates, allow_input_downcast=True)
        self.compute_prob = theano.function([ob_no], prob_na, allow_input_downcast=True)
ctrlr_optimizer.py 文件源码 项目:Buffe 作者: bentzinir 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):

        params=None

        if trained_model:
            params = common.load_params(trained_model)

        self.lr_func = create_learning_rate_func(solver_params)

        self.x_h_0 = tt.fvector('x_h_0')
        self.v_h_0 = tt.fvector('v_h_0')
        self.t_h_0 = tt.fvector('t_h_0')
        self.x_t_0 = tt.fmatrix('x_t_0')
        self.v_t_0 = tt.fmatrix('v_t_0')
        self.a_t_0 = tt.fmatrix('a_t_0')
        self.t_t_0 = tt.fvector('t_t_0')
        self.time_steps = tt.fvector('t_0')
        self.exist = tt.bvector('exist')
        self.is_leader = tt.fvector('is_leader')
        self.x_goal = tt.fvector('x_goal')
        self.turn_vec_h = tt.fvector('turn_vec_h')
        self.turn_vec_t = tt.fvector('turn_vec_t')
        self.n_steps = tt.iscalar('n_steps')
        self.lr = tt.fscalar('lr')
        self.sn_dir = sn_dir
        self.game_params = game_params
        self.arch_params = arch_params
        self.solver_params = solver_params

        self.model = CONTROLLER(self.x_h_0,
                                self.v_h_0,
                                self.t_h_0,
                                self.x_t_0,
                                self.v_t_0,
                                self.a_t_0,
                                self.t_t_0,
                                self.time_steps,
                                self.exist,
                                self.is_leader,
                                self.x_goal,
                                self.turn_vec_h,
                                self.turn_vec_t,
                                self.n_steps,
                                self.lr,
                                self.game_params,
                                self.arch_params,
                                self.solver_params,
                                params)
ctrlr_optimizer.py 文件源码 项目:Buffe 作者: bentzinir 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):

        params=[None, None]

        if trained_model[0]:
            params[0] = common.load_params(trained_model[0])

        if trained_model[1]:
            params[1] = common.load_params(trained_model[1])

        self.lr_func = []
        self.lr_func.append(create_learning_rate_func(solver_params['controler_0']))
        self.lr_func.append(create_learning_rate_func(solver_params['controler_1']))

        self.x_host_0 = tt.fvector('x_host_0')
        self.v_host_0 = tt.fvector('v_host_0')
        self.x_target_0 = tt.fvector('x_target_0')
        self.v_target_0 = tt.fvector('v_target_0')
        self.x_mines_0 = tt.fmatrix('x_mines_0')
        self.mines_map = tt.fmatrix('mines_map')
        self.time_steps = tt.fvector('time_steps')
        self.force = tt.fmatrix('force')
        self.n_steps_0 = tt.iscalar('n_steps_0')
        self.n_steps_1 = tt.iscalar('n_steps_1')
        self.lr = tt.fscalar('lr')
        self.goal_1 = tt.fvector('goal_1')
        self.trnsprnt = tt.fscalar('trnsprnt')
        self.rand_goals = tt.fmatrix('rand_goals')
        self.game_params = game_params
        self.arch_params = arch_params
        self.solver_params = solver_params
        self.sn_dir = sn_dir

        self.model = CONTROLLER(self.x_host_0,
                                self.v_host_0,
                                self.x_target_0,
                                self.v_target_0,
                                self.x_mines_0,
                                self.mines_map,
                                self.time_steps,
                                self.force,
                                self.n_steps_0,
                                self.n_steps_1,
                                self.lr,
                                self.goal_1,
                                self.trnsprnt,
                                self.rand_goals,
                                self.game_params,
                                self.arch_params,
                                self.solver_params,
                                params)
annealing.py 文件源码 项目:crayimage 作者: yandexdataschool 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def sa(inputs, loss, params, outputs = (), srng=None, seed=1122334455, iters=32,
       initial_temperature = 1.0e-1, learning_rate=1.0e-2):
  if srng is None:
    # from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams as RandomStreams
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    srng = srng or RandomStreams(seed=seed)

  inputs_cached = [ to_shared(i) for i in inputs ]
  input_setter = OrderedDict()
  for inpc, inp in zip(inputs_cached, inputs):
    input_setter[inpc] = inp

  memorize_inputs = theano.function(inputs, [], updates=input_setter, no_default_updates=True)

  inputs_givens = [
    (inp, inpc)
    for inp, inpc in zip(inputs, inputs_cached)
  ]

  deltas = [
    make_copy(param)
    for param in params
  ]

  alpha = T.fscalar('learning rate')

  delta_setter = OrderedDict([
    (delta, make_uniform(delta, -alpha, alpha, srng))
    for delta in deltas
  ])

  generate_deltas = theano.function([alpha], [], updates=delta_setter, no_default_updates=False)

  probe_givens = [
    (param, param + delta)
    for param, delta in zip(params, deltas)
  ]

  probe = theano.function(
    [], [loss] + list(outputs),
    givens=probe_givens + inputs_givens,
    no_default_updates=True
  )

  params_setter = OrderedDict(probe_givens)

  set_params = theano.function(
    [], [],
    updates=params_setter,
    no_default_updates=True
  )

  return simulated_annealing(
    probe, memorize_inputs, set_params, generate_deltas,
    iters, initial_temperature, learning_rate
  )


问题


面经


文章

微信
公众号

扫码关注公众号