def __init__(self):
# Define inputs
input_var = T.ftensor4('input_var') # input images (batchx3x64x64)
labels_classifier_var = T.ivector('labels_classifier_var') # labels for images
labels_domain_var = T.ivector('labels_domain_var') # labels for domain (1 for source, 0 for target)
learning_rate = T.fscalar('learning_rate')
# Define classifier networks
network_classifier = self.network_classifier(input_var)
network_discriminator = self.network_discriminator(network_classifier['classifier/pool1'])
# Define outputs
prediction_classifier = get_output(network_classifier['classifier/output']) # prob image classification
prediction_discriminator = get_output(network_discriminator['discriminator/output']) # prob image domain (should be 1 for source)
# Define losses (objectives)
loss_classifier_only = T.mean(categorical_crossentropy(prediction_classifier, labels_classifier_var) * labels_domain_var)
loss_discriminator = T.mean(categorical_crossentropy(prediction_discriminator, labels_domain_var))
loss_classifier = loss_classifier_only - loss_discriminator
# Define performance
perf_classifier_only = categorical_accuracy(prediction_classifier, labels_classifier_var).mean()
perf_discriminator = categorical_accuracy(prediction_discriminator, labels_domain_var).mean()
# Define params
params_classifier = lasagne.layers.get_all_params(network_classifier['classifier/output'], trainable=True)
params_discriminator = lasagne.layers.get_all_params(network_discriminator['discriminator/output'], trainable=True)
params_discriminator = [param for param in params_discriminator if 'discriminator' in param.name]
# Define updates
updates_classifier = lasagne.updates.adam(loss_classifier, params_classifier, learning_rate=learning_rate)
updates_classifier_only = lasagne.updates.adam(loss_classifier_only, params_classifier, learning_rate=learning_rate)
updates_discriminator = lasagne.updates.adam(loss_discriminator, params_discriminator, learning_rate=learning_rate)
# Define training functions
self.train_fn_classifier = theano.function(
[input_var, labels_classifier_var, labels_domain_var, learning_rate],
[loss_classifier, loss_classifier_only, prediction_classifier],
updates=updates_classifier)
self.train_fn_classifier_only = theano.function(
[input_var, labels_classifier_var, labels_domain_var, learning_rate],
[loss_classifier, loss_classifier_only, prediction_classifier],
updates=updates_classifier_only)
self.train_fn_discriminator = theano.function(
[input_var, labels_domain_var, learning_rate],
[loss_discriminator, prediction_discriminator],
updates=updates_discriminator)
# Define validation functions
self.valid_fn_classifier = theano.function(
[input_var, labels_classifier_var],
[perf_classifier_only, prediction_classifier])
self.valid_fn_discriminator = theano.function(
[input_var, labels_domain_var],
[perf_discriminator, prediction_discriminator])
python类fscalar()的实例源码
def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior, batch_size, X_train, y_train, N_train):
layer_sizes = copy.copy(layer_sizes)
layer_sizes[ 0 ] = layer_sizes[ 0 ] + 1
print layer_sizes
self.batch_size = batch_size
self.N_train = N_train
self.X_train = X_train
self.y_train = y_train
self.rate = learning_rate
# We create the network
self.network = network.Network(layer_sizes, n_samples, v_prior, N_train)
# index to a batch
index = T.lscalar()
self.indexes = T.vector('index', dtype = 'int32')
indexes_train = theano.shared(value = np.array(range(0, N_train), dtype = np.int32), borrow = True)
self.x = T.tensor3('x',dtype=theano.config.floatX)
self.y = T.matrix('y', dtype =theano.config.floatX)
self.lr = T.fscalar()
# The logarithm of the values for the likelihood factors
sampl = T.bscalar()
self.fwpass = theano.function(outputs=self.network.output(self.x,False,samples=sampl,use_indices=False), inputs=[self.x,sampl],allow_input_downcast=True)
ll_train = self.network.log_likelihood_values(self.x, self.y, self.indexes, 0.0, 1.0)
self.estimate_marginal_ll = (-1.0 * N_train / (self.x.shape[ 1 ] * alpha) * \
T.sum(LogSumExp(alpha * (T.sum(ll_train, 2) - self.network.log_f_hat() - self.network.log_f_hat_z()), 0)+ \
T.log(1.0 / n_samples)) - self.network.log_normalizer_q() - 1.0 * N_train / self.x.shape[ 1 ] * self.network.log_normalizer_q_z() + \
self.network.log_Z_prior())
# We create a theano function for updating q
upd = adam(self.estimate_marginal_ll, self.network.params,indexes_train[index*batch_size:(index+1)*batch_size],self.rate,rescale_local=np.float32(N_train/batch_size))
self.process_minibatch = theano.function([ index], self.estimate_marginal_ll, \
updates = upd, \
givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size] , [ n_samples, 1, 1 ]),
self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })
# We create a theano function for making predictions
self.error_minibatch_train = theano.function([ index ],
T.sum((T.mean(self.network.output(self.x,self.indexes), 0, keepdims = True)[ 0, :, : ] - self.y)**2) / layer_sizes[ -1 ],
givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })
self.ll_minibatch_train = theano.function([ index ], T.sum(LogSumExp(T.sum(ll_train, 2), 0) + T.log(1.0 / n_samples)), \
givens = { self.x: T.tile(self.X_train[ index * batch_size: (index + 1) * batch_size ], [ n_samples, 1, 1 ]),
self.y: self.y_train[ index * batch_size: (index + 1) * batch_size ],
self.indexes: indexes_train[ index * batch_size : (index + 1) * batch_size ] })
model.py 文件源码
项目:Towards-a-Biologically-Plausible-Backprop
作者: bscellier
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __build_weakly_clamped_phase(self):
n_iterations = T.iscalar('n_iterations')
epsilon = T.fscalar('epsilon')
beta = T.fscalar('beta')
alphas = [T.fscalar("alpha_W"+str(r+1)) for r in range(len(self.weights))]
def step(*layers):
F_sum = T.sum(self.__total_energy(layers, beta))
layers_dot = T.grad(-F_sum, list(layers)) # temporal derivative of the state (weakly clamped trajectory)
layers_new = [layers[0]]+[T.clip(layer+epsilon*dot,0.,1.) for layer,dot in zip(layers,layers_dot)][1:]
return layers_new
( layers, updates ) = theano.scan(
step,
outputs_info=self.layers,
n_steps=n_iterations
)
layers_weakly_clamped = [layer[-1] for layer in layers]
E_mean_free = T.mean(self.__energy(self.layers))
E_mean_weakly_clamped = T.mean(self.__energy(layers_weakly_clamped))
biases_dot = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.biases, consider_constant=layers_weakly_clamped)
weights_dot = T.grad( (E_mean_weakly_clamped-E_mean_free) / beta, self.weights, consider_constant=layers_weakly_clamped)
biases_new = [b - alpha * dot for b,alpha,dot in zip(self.biases[1:],alphas,biases_dot[1:])]
weights_new = [W - alpha * dot for W,alpha,dot in zip(self.weights, alphas,weights_dot)]
Delta_log = [T.sqrt( ((W_new - W) ** 2).mean() ) / T.sqrt( (W ** 2).mean() ) for W,W_new in zip(self.weights,weights_new)]
for bias, bias_new in zip(self.biases[1:],biases_new):
updates[bias]=bias_new
for weight, weight_new in zip(self.weights,weights_new):
updates[weight]=weight_new
weakly_clamped_phase = theano.function(
inputs=[n_iterations, epsilon, beta]+alphas,
outputs=Delta_log,
updates=updates
)
return weakly_clamped_phase
def test_local_merge_alloc():
# Add this opt to the default mode,
# otherwise, FAST_COMPILE fails.
default_mode = theano.compile.mode.get_default_mode()
opt_mode = default_mode.including("local_merge_alloc")
x = T.iscalar('x')
y = T.iscalar('y')
y2 = T.iscalar('y2')
z = T.iscalar('z')
w = T.iscalar('w')
m = T.fscalar('m')
# case 1
# Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
f = theano.function([m, x, y, z, w], output, mode=opt_mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, T.Alloc)
o = f(0., 1, 2, 3, 4)
assert o.shape == (1, 2, 3, 4)
# case 2
# Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
f = theano.function([m, x, y, z, w], output, mode=opt_mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, T.Alloc)
o = f(0., 1, 2, 3, 4)
assert o.shape == (1, 2, 3, 4)
# case 3
# Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
# Alloc(m, x, assert(y1, y1==y2), z, w)
output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
f = theano.function([m, x, y, y2, z, w], output, mode=opt_mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[-2].op, T.opt.Assert)
assert isinstance(topo[-1].op, T.Alloc)
o = f(0., 1, 2, 2, 3, 4)
assert o.shape == (1, 2, 3, 4)
assert_raises((AssertionError, ValueError), f, 0., 1, 2, 5, 3, 4)
def test_local_useless_alloc():
useless_alloc = out2in(local_useless_alloc)
merge_alloc = out2in(local_merge_alloc)
x = T.iscalar('x')
y = T.iscalar('y')
y2 = T.iscalar('y2')
z = T.iscalar('z')
w = T.iscalar('w')
m = T.fscalar('m')
# case 1
# Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
g = FunctionGraph([m, x, y, z, w], [output])
useless_alloc.optimize(g)
merge_alloc.optimize(g)
useless_alloc.optimize(g)
topo = g.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, T.Alloc)
# case 2
# Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
g = FunctionGraph([m, x, y, z, w], [output])
useless_alloc.optimize(g)
merge_alloc.optimize(g)
useless_alloc.optimize(g)
topo = g.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, T.Alloc)
# case 3
# Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
# Alloc(m, x, assert(y1, y1==y2), z, w)
output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
g = FunctionGraph([m, x, y, y2, z, w], [output])
useless_alloc.optimize(g)
merge_alloc.optimize(g)
useless_alloc.optimize(g)
topo = g.toposort()
assert len(topo) == 3
assert isinstance(topo[-2].op, T.opt.Assert)
assert isinstance(topo[-1].op, T.Alloc)
def test_one_sequence_one_output_weights_gpu2(self):
def f_rnn(u_t, x_tm1, W_in, W):
return u_t * W_in + x_tm1 * W
u = theano.tensor.fvector('u')
x0 = theano.tensor.fscalar('x0')
W_in = theano.tensor.fscalar('win')
W = theano.tensor.fscalar('w')
output, updates = theano.scan(f_rnn,
u,
x0,
[W_in, W],
n_steps=None,
truncate_gradient=-1,
go_backwards=False,
mode=self.mode_with_gpu)
f2 = theano.function([u, x0, W_in, W],
output,
updates=updates,
allow_input_downcast=True,
mode=self.mode_with_gpu)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform(size=(4,), low=-5., high=5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[0] * W_in + v_x0 * W
for step in xrange(1, 4):
v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
theano_values = f2(v_u, v_x0, W_in, W)
utt.assert_allclose(theano_values, v_out)
topo = f2.maker.fgraph.toposort()
assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
for node in topo]) == 1
assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
for node in topo]) == 4
scan_node = [node for node in topo
if isinstance(node.op, theano.scan_module.scan_op.Scan)]
assert len(scan_node) == 1
scan_node = scan_node[0]
scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()
# check that there is no gpu transfer in the inner loop.
assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
for node in scan_node_topo])
assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
for node in scan_node_topo])
assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
for node in scan_node_topo])
# This third test checks that scan can deal with a mixture of dtypes as
# outputs when is running on GPU
def test_gpu3_mixture_dtype_outputs(self):
def f_rnn(u_t, x_tm1, W_in, W):
return (u_t * W_in + x_tm1 * W,
tensor.cast(u_t + x_tm1, 'int64'))
u = theano.tensor.fvector('u')
x0 = theano.tensor.fscalar('x0')
W_in = theano.tensor.fscalar('win')
W = theano.tensor.fscalar('w')
output, updates = theano.scan(f_rnn,
u,
[x0, None],
[W_in, W],
n_steps=None,
truncate_gradient=-1,
go_backwards=False,
mode=self.mode_with_gpu)
f2 = theano.function([u, x0, W_in, W],
output,
updates=updates,
allow_input_downcast=True,
mode=self.mode_with_gpu)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform(size=(4,), low=-5., high=5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out1 = numpy.zeros((4,))
v_out2 = numpy.zeros((4,), dtype='int64')
v_out1[0] = v_u[0] * W_in + v_x0 * W
v_out2[0] = v_u[0] + v_x0
for step in xrange(1, 4):
v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
v_out2[step] = numpy.int64(v_u[step] + v_out1[step - 1])
theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
utt.assert_allclose(theano_out1, v_out1)
utt.assert_allclose(theano_out2, v_out2)
topo = f2.maker.fgraph.toposort()
scan_node = [node for node in topo
if isinstance(node.op, theano.scan_module.scan_op.Scan)]
assert len(scan_node) == 1
scan_node = scan_node[0]
assert self.is_scan_on_gpu(scan_node)
def test_dot22scalar():
def cmp(a_shp, b_shp):
a = tensor.fmatrix()
b = tensor.fmatrix()
scalar = tensor.fscalar()
av = my_rand(*a_shp)
bv = my_rand(*b_shp)
f = theano.function(
[a, b],
tensor.dot(a, b) * numpy.asarray(4, 'float32'),
mode=mode_with_gpu)
f2 = theano.function(
[a, b],
tensor.dot(a, b) * numpy.asarray(4, 'float32'))
t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
# assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
# for n in t])
assert numpy.allclose(f(av, bv), f2(av, bv))
f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
mode=mode_with_gpu)
f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
# assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
# for n in t])
assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
f = theano.function([a, b, scalar],
tensor.blas._dot22scalar(a, b, scalar),
mode=mode_with_gpu)
f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
t = f.maker.fgraph.toposort()
assert len(t) == 4
assert isinstance(t[0].op, tcn.GpuFromHost)
assert isinstance(t[1].op, tcn.GpuFromHost)
assert isinstance(t[2].op, tcn.blas.GpuDot22Scalar)
assert isinstance(t[3].op, tcn.HostFromGpu)
assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
cmp((3, 4), (4, 5))
cmp((0, 4), (4, 5))
cmp((3, 4), (4, 0))
cmp((3, 0), (0, 5))
cmp((0, 4), (4, 0))
cmp((0, 0), (0, 0))
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
n_train=100):
if config.mode == 'DEBUG_MODE':
n_train = 1
if use_gpu:
w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
b = tcn.shared_constructor(my_zeros(n_hid), 'b')
v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
c = tcn.shared_constructor(my_zeros(n_out), 'c')
else:
w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
b = shared(my_zeros(n_hid), 'b')
v = shared(my_zeros((n_hid, n_out)), 'c')
c = shared(my_zeros(n_out), 'c')
x = tensor.fmatrix('x')
y = tensor.fmatrix('y')
lr = tensor.fscalar('lr')
hid = tensor.tanh(tensor.dot(x, w) + b)
out = tensor.tanh(tensor.dot(hid, v) + c)
loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
if 0:
print('loss type', loss.type)
params = [w, b, v, c]
gparams = tensor.grad(loss, params)
mode = get_mode(use_gpu)
# print 'building pfunc ...'
train = pfunc([x, y, lr], [loss], mode=mode,
updates=[(p, p - g) for p, g in izip(params, gparams)])
if 0:
for i, n in enumerate(train.maker.fgraph.toposort()):
print(i, n)
xval = my_rand(n_batch, n_in)
yval = my_rand(n_batch, n_out)
lr = theano._asarray(0.01, dtype='float32')
t0 = time.time()
rval = []
for i in xrange(n_train):
rval.append(train(xval, yval, lr))
dt = time.time() - t0
return numpy.asarray(rval), dt
def run_conv_nnet1(use_gpu):
if use_gpu:
shared_fn = tcn.shared_constructor
else:
shared_fn = shared
n_batch = 16
n_kern = 20
shape_img = (n_batch, 1, 32, 32)
shape_kern = (n_kern, 1, 5, 5)
n_train = 10
if config.mode == 'DEBUG_MODE':
n_train = 1
logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
shape_img[2:], shape_kern[2:], 'valid')
n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
n_out = 10
w = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w')
b = shared_fn(my_zeros((n_kern,)), 'b')
v = shared_fn(my_zeros((n_hid, n_out)), 'c')
c = shared_fn(my_zeros(n_out), 'c')
x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
y = tensor.fmatrix('y')
lr = tensor.fscalar('lr')
conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)
hid = tensor.tanh(conv_op(x, w) + b.dimshuffle((0, 'x', 'x')))
hid_flat = hid.reshape((n_batch, n_hid))
out = tensor.tanh(tensor.dot(hid_flat, v) + c)
loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
# print 'loss type', loss.type
params = [w, b, v, c]
gparams = tensor.grad(loss, params)
mode = get_mode(use_gpu)
# print 'building pfunc ...'
train = pfunc(
[x, y, lr],
[loss],
mode=mode,
updates=[(p, p - g) for p, g in zip(params, gparams)])
# for i, n in enumerate(train.maker.fgraph.toposort()):
# print i, n
xval = my_rand(*shape_img)
yval = my_rand(n_batch, n_out)
lr = theano._asarray(0.01, dtype='float32')
for i in xrange(n_train):
rval = train(xval, yval, lr)
# print 'training done'
return rval
def __init__(self, ob_space, action_space, **usercfg):
"""
Initialize your agent's parameters
"""
nO = ob_space.shape[0]
nA = action_space.n
# Here are all the algorithm parameters. You can modify them by passing in keyword args
self.config = dict(episode_max_length=100, timesteps_per_batch=10000, n_iter=100,
gamma=1.0, stepsize=0.05, nhid=20)
self.config.update(usercfg)
# Symbolic variables for observation, action, and advantage
# These variables stack the results from many timesteps--the first dimension is the timestep
ob_no = T.fmatrix() # Observation
a_n = T.ivector() # Discrete action
adv_n = T.fvector() # Advantage
def shared(arr):
return theano.shared(arr.astype('float64'))
# Create weights of neural network with one hidden layer
W0 = shared(np.random.randn(nO,self.config['nhid'])/np.sqrt(nO))
b0 = shared(np.zeros(self.config['nhid']))
W1 = shared(1e-4*np.random.randn(self.config['nhid'],nA))
b1 = shared(np.zeros(nA))
params = [W0, b0, W1, b1]
# Action probabilities
prob_na = T.nnet.softmax(T.tanh(ob_no.dot(W0)+b0[None,:]).dot(W1) + b1[None,:])
N = ob_no.shape[0]
# Loss function that we'll differentiate to get the policy gradient
# Note that we've divided by the total number of timesteps
loss = T.log(prob_na[T.arange(N), a_n]).dot(adv_n) / N
stepsize = T.fscalar()
grads = T.grad(loss, params)
# Perform parameter updates.
# I find that sgd doesn't work well
# updates = sgd_updates(grads, params, stepsize)
updates = rmsprop_updates(grads, params, stepsize)
self.pg_update = theano.function([ob_no, a_n, adv_n, stepsize], [], updates=updates, allow_input_downcast=True)
self.compute_prob = theano.function([ob_no], prob_na, allow_input_downcast=True)
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):
params=None
if trained_model:
params = common.load_params(trained_model)
self.lr_func = create_learning_rate_func(solver_params)
self.x_h_0 = tt.fvector('x_h_0')
self.v_h_0 = tt.fvector('v_h_0')
self.t_h_0 = tt.fvector('t_h_0')
self.x_t_0 = tt.fmatrix('x_t_0')
self.v_t_0 = tt.fmatrix('v_t_0')
self.a_t_0 = tt.fmatrix('a_t_0')
self.t_t_0 = tt.fvector('t_t_0')
self.time_steps = tt.fvector('t_0')
self.exist = tt.bvector('exist')
self.is_leader = tt.fvector('is_leader')
self.x_goal = tt.fvector('x_goal')
self.turn_vec_h = tt.fvector('turn_vec_h')
self.turn_vec_t = tt.fvector('turn_vec_t')
self.n_steps = tt.iscalar('n_steps')
self.lr = tt.fscalar('lr')
self.sn_dir = sn_dir
self.game_params = game_params
self.arch_params = arch_params
self.solver_params = solver_params
self.model = CONTROLLER(self.x_h_0,
self.v_h_0,
self.t_h_0,
self.x_t_0,
self.v_t_0,
self.a_t_0,
self.t_t_0,
self.time_steps,
self.exist,
self.is_leader,
self.x_goal,
self.turn_vec_h,
self.turn_vec_t,
self.n_steps,
self.lr,
self.game_params,
self.arch_params,
self.solver_params,
params)
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):
params=[None, None]
if trained_model[0]:
params[0] = common.load_params(trained_model[0])
if trained_model[1]:
params[1] = common.load_params(trained_model[1])
self.lr_func = []
self.lr_func.append(create_learning_rate_func(solver_params['controler_0']))
self.lr_func.append(create_learning_rate_func(solver_params['controler_1']))
self.x_host_0 = tt.fvector('x_host_0')
self.v_host_0 = tt.fvector('v_host_0')
self.x_target_0 = tt.fvector('x_target_0')
self.v_target_0 = tt.fvector('v_target_0')
self.x_mines_0 = tt.fmatrix('x_mines_0')
self.mines_map = tt.fmatrix('mines_map')
self.time_steps = tt.fvector('time_steps')
self.force = tt.fmatrix('force')
self.n_steps_0 = tt.iscalar('n_steps_0')
self.n_steps_1 = tt.iscalar('n_steps_1')
self.lr = tt.fscalar('lr')
self.goal_1 = tt.fvector('goal_1')
self.trnsprnt = tt.fscalar('trnsprnt')
self.rand_goals = tt.fmatrix('rand_goals')
self.game_params = game_params
self.arch_params = arch_params
self.solver_params = solver_params
self.sn_dir = sn_dir
self.model = CONTROLLER(self.x_host_0,
self.v_host_0,
self.x_target_0,
self.v_target_0,
self.x_mines_0,
self.mines_map,
self.time_steps,
self.force,
self.n_steps_0,
self.n_steps_1,
self.lr,
self.goal_1,
self.trnsprnt,
self.rand_goals,
self.game_params,
self.arch_params,
self.solver_params,
params)
def sa(inputs, loss, params, outputs = (), srng=None, seed=1122334455, iters=32,
initial_temperature = 1.0e-1, learning_rate=1.0e-2):
if srng is None:
# from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams as RandomStreams
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
srng = srng or RandomStreams(seed=seed)
inputs_cached = [ to_shared(i) for i in inputs ]
input_setter = OrderedDict()
for inpc, inp in zip(inputs_cached, inputs):
input_setter[inpc] = inp
memorize_inputs = theano.function(inputs, [], updates=input_setter, no_default_updates=True)
inputs_givens = [
(inp, inpc)
for inp, inpc in zip(inputs, inputs_cached)
]
deltas = [
make_copy(param)
for param in params
]
alpha = T.fscalar('learning rate')
delta_setter = OrderedDict([
(delta, make_uniform(delta, -alpha, alpha, srng))
for delta in deltas
])
generate_deltas = theano.function([alpha], [], updates=delta_setter, no_default_updates=False)
probe_givens = [
(param, param + delta)
for param, delta in zip(params, deltas)
]
probe = theano.function(
[], [loss] + list(outputs),
givens=probe_givens + inputs_givens,
no_default_updates=True
)
params_setter = OrderedDict(probe_givens)
set_params = theano.function(
[], [],
updates=params_setter,
no_default_updates=True
)
return simulated_annealing(
probe, memorize_inputs, set_params, generate_deltas,
iters, initial_temperature, learning_rate
)