def create_learning_rate_func(solver_params):
base = tt.fscalar('base')
gamma = tt.fscalar('gamma')
power = tt.fscalar('power')
itrvl = tt.fscalar('itrvl')
iter = tt.scalar('iter')
if solver_params['lr_type']=='inv':
lr_ = base * tt.pow(1 + gamma * iter, -power)
lr = t.function(
inputs=[iter, t.Param(base, default=solver_params['base']), t.Param(gamma, default=solver_params['gamma']), t.Param(power, default=solver_params['power'])],
outputs=lr_)
elif solver_params['lr_type']=='fixed':
lr_ = base
lr = t.function(
inputs=[iter, t.Param(base, default=solver_params['base'])],
outputs=lr_,
on_unused_input='ignore')
elif solver_params['lr_type']=='episodic':
lr_ = base / (tt.floor(iter/itrvl) + 1)
lr = t.function(
inputs=[iter, t.Param(base, default=solver_params['base']), t.Param(itrvl, default=solver_params['interval'])],
outputs=lr_,
on_unused_input='ignore')
return lr
python类fscalar()的实例源码
def sample_old(self, x, sigma, n_steps):
# Enable on-the-fly graph computations
# theano.config.compute_test_value = "raise"
# in_val = T.fmatrix('input_values")
# in_val.tag.test_value = np.asarray(
# np.random.rand(1, 784), dtype=theano.config.floatX)
# s_sigma = T.fscalar("sigma_value")
# s_sigma = np.asarray(
# np.random.rand(1), dtype=theano.config.floatX)
# mode = "FAST_RUN"
samples = []
sample = x
samples.append(x)
for i in xrange(n_steps):
print "Sample %d ..." % i
sampler = self.sample_one_step(sample, sigma)
sample = sampler.eval()
samples.append(sample)
return samples
def __init__(self, data_manager, t_layer_sizes, p_layer_sizes, dropout=0):
print('{:25}'.format("Initializing Model"), end='', flush=True)
self.t_layer_sizes = t_layer_sizes
self.p_layer_sizes = p_layer_sizes
self.dropout = dropout
self.data_manager = data_manager
self.t_input_size = self.data_manager.f.feature_count
self.output_size = self.data_manager.s.information_count
self.time_model = StackedCells(self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
self.time_model.layers.append(PassthroughLayer())
p_input_size = t_layer_sizes[-1] + self.output_size
self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
self.pitch_model.layers.append(Layer(p_layer_sizes[-1], self.output_size, activation = T.nnet.sigmoid))
self.conservativity = T.fscalar()
self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
self.epsilon = np.spacing(np.float32(1.0))
print("Done")
def sample_old(self, x, sigma, n_steps):
# Enable on-the-fly graph computations
# theano.config.compute_test_value = "raise"
# in_val = T.fmatrix('input_values")
# in_val.tag.test_value = np.asarray(
# np.random.rand(1, 784), dtype=theano.config.floatX)
# s_sigma = T.fscalar("sigma_value")
# s_sigma = np.asarray(
# np.random.rand(1), dtype=theano.config.floatX)
# mode = "FAST_RUN"
samples = []
sample = x
samples.append(x)
for i in xrange(n_steps):
print "Sample %d ..." % i
sampler = self.sample_one_step(sample, sigma)
sample = sampler.eval()
samples.append(sample)
return samples
def setUp(self):
super(TestPdbBreakpoint, self).setUp()
# Sample computation that involves tensors with different numbers
# of dimensions
self.input1 = T.fmatrix()
self.input2 = T.fscalar()
self.output = T.dot((self.input1 - self.input2),
(self.input1 - self.input2).transpose())
# Declare the conditional breakpoint
self.breakpointOp = PdbBreakpoint("Sum of output too high")
self.condition = T.gt(self.output.sum(), 1000)
(self.monitored_input1,
self.monitored_input2,
self.monitored_output) = self.breakpointOp(self.condition,
self.input1,
self.input2, self.output)
def setUp(self):
self.iv = T.tensor(dtype='int32', broadcastable=(False,))
self.fv = T.tensor(dtype='float32', broadcastable=(False,))
self.fv1 = T.tensor(dtype='float32', broadcastable=(True,))
self.dv = T.tensor(dtype='float64', broadcastable=(False,))
self.dv1 = T.tensor(dtype='float64', broadcastable=(True,))
self.cv = T.tensor(dtype='complex64', broadcastable=(False,))
self.zv = T.tensor(dtype='complex128', broadcastable=(False,))
self.fv_2 = T.tensor(dtype='float32', broadcastable=(False,))
self.fv1_2 = T.tensor(dtype='float32', broadcastable=(True,))
self.dv_2 = T.tensor(dtype='float64', broadcastable=(False,))
self.dv1_2 = T.tensor(dtype='float64', broadcastable=(True,))
self.cv_2 = T.tensor(dtype='complex64', broadcastable=(False,))
self.zv_2 = T.tensor(dtype='complex128', broadcastable=(False,))
self.fm = T.fmatrix()
self.dm = T.dmatrix()
self.cm = T.cmatrix()
self.zm = T.zmatrix()
self.fa = T.fscalar()
self.da = T.dscalar()
self.ca = T.cscalar()
self.za = T.zscalar()
def test_param_allow_downcast_floatX(self):
a = tensor.fscalar('a')
b = tensor.fscalar('b')
c = tensor.fscalar('c')
f = pfunc([In(a, allow_downcast=True),
In(b, allow_downcast=False),
In(c, allow_downcast=None)],
(a + b + c))
# If the values can be accurately represented, everything is OK
assert numpy.all(f(0, 0, 0) == 0)
# If allow_downcast is True, idem
assert numpy.allclose(f(0.1, 0, 0), 0.1)
# If allow_downcast is False, nope
self.assertRaises(TypeError, f, 0, 0.1, 0)
# If allow_downcast is None, it should work iff floatX=float32
if config.floatX == 'float32':
assert numpy.allclose(f(0, 0, 0.1), 0.1)
else:
self.assertRaises(TypeError, f, 0, 0, 0.1)
def test_grad_dtype_change(self):
x = tensor.fscalar('x')
y = tensor.fscalar('y')
c = tensor.iscalar('c')
def inner_fn(cond, x, y):
new_cond = tensor.cast(tensor.switch(cond, x, y), 'int32')
new_x = tensor.switch(cond, tensor.nnet.sigmoid(y * x), x)
new_y = tensor.switch(cond, y, tensor.nnet.sigmoid(x))
return new_cond, new_x, new_y
values, _ = theano.scan(
inner_fn,
outputs_info=[c, x, y],
n_steps=10,
truncate_gradient=-1,
go_backwards=False)
gX, gY = tensor.grad(values[1].sum(), [x, y])
f = theano.function([c, x, y], [gX, gY],
allow_input_downcast=True)
# Check for runtime errors
f(numpy.int32(0), numpy.float32(1.), numpy.float32(.5))
def test_gpualloc_output_to_gpu():
a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
a = tcn.shared_constructor(a_val)
b = T.fscalar()
f = theano.function([b], T.ones_like(a) + b, mode=mode_without_gpu)
f_gpu = theano.function([b], B.gpu_from_host(T.ones_like(a)) + b,
mode=mode_with_gpu)
f(2)
f_gpu(2)
assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 1
assert sum([node.op == B.gpu_alloc
for node in f_gpu.maker.fgraph.toposort()]) == 1
assert numpy.allclose(numpy.ones(a.get_value(borrow=True).shape) + 9,
f_gpu(9))
assert numpy.allclose(f(5), f_gpu(5))
def get_tile_coder(min_val, max_val, num_tiles, num_tilings,
num_features, learning_rate):
# x.shape: (num_features), y.shape: ()
x = T.fvector('x')
y = T.fscalar('y')
tile_coding_layer = TileCodingLayer(
min_val=min_val, max_val=max_val,
num_tiles=num_tiles, num_tilings=num_tilings,
num_features=num_features)
# quantized_x
q_x = tile_coding_layer.quantize(x)
y_hat = tile_coding_layer.approximate(q_x)
updates = tile_coding_layer.update_rule(y, y_hat, 0.1)
train = theano.function([x, y], y_hat, updates=updates, allow_input_downcast=True)
eval_ = theano.function([x], y_hat, allow_input_downcast=True)
return train, eval_
def optimize_function(model, solver_params, config=None):
"""
Create a optimizing function receives gradients.
Parameters:
params - parameters
config - training configuration
Returns:
updating function receives gradients
"""
gradients_ = [dim_to_var(p.ndim) for p in model.params]
lr_ = tt.fscalar('lr_')
updates = optimizers.optimizer(lr=lr_,
model=model,
gradients=gradients_,
solver_params=solver_params)
return t.function(inputs=[lr_]+ gradients_, outputs=[], updates=updates)
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir):
params=None
if trained_model:
params = common.load_params(trained_model)
self.lr_func = create_learning_rate_func(solver_params)
self.v_h_0 = tt.fvector('v_h_0')
self.x_h_0 = tt.fvector('x_h_0')
self.v_t_0 = tt.fmatrix('v_t_0')
self.x_t_0 = tt.fmatrix('x_t_0')
self.a_t_0 = tt.fmatrix('a_t_0')
self.is_aggressive = tt.fmatrix('is_aggressive')
self.lr_ = tt.fscalar('lr')
self.n_steps_ = tt.iscalar('n_steps')
self.sn_dir = sn_dir
self.game_params = game_params
self.arch_params = arch_params
self.solver_params = solver_params
self.model = CONTROLLER(self.v_h_0, self.x_h_0, self.v_t_0, self.x_t_0, self.a_t_0, self.is_aggressive, self.lr_, self.n_steps_, self.game_params, self.arch_params, self.solver_params, params)
def setUp(self):
self.initial = np.array([0.1, 0.1], dtype='float32')
x = theano.shared(self.initial)
self.params = [x]
left_bound = T.fscalar('left bound')
right_bound = T.fscalar('right bound')
self.inputs = [left_bound, right_bound]
y = T.sum(x)
loss = -T.log(y - left_bound) - T.log(right_bound - y) + 1.0e-3 * T.sum(x ** 2)
self.loss = loss
x0 = (0.01 + 0.011 + 2.0 + 2.1) / 4.0
self.approx_solution = np.array([x0 / 2, x0 / 2], dtype='float32')
self.get_inputs = lambda : [
np.float32(np.random.uniform(0.01, 0.011)),
np.float32(np.random.uniform(2.0, 2.1)),
]
x_sub = T.fvector('x sub')
self.get_loss = theano.function([x_sub] + self.inputs, self.loss, givens=[(self.params[0], x_sub)])
def genLossAndGradient(self):
# establish loss
kl_div = lasagne.layers.get_output(self.kl_loss_layer,
deterministic=False)
kl_loss = lasagne.objectives.aggregate(kl_div, mode='sum')
# assume the reconstructed all with standard Gaussian distribution
recons_loss = lasagne.objectives.squared_error(self.recons_var,
self.pose_input_var)
recons_loss = recons_loss*0.5
recons_loss = lasagne.objectives.aggregate(recons_loss, mode='sum')
# calculate gradient
loss = kl_loss + recons_loss
# loss = recons_loss
lr_var = T.fscalar('lr')
update_params = self.encoder_params + self.decoder_params
update_vars = lasagne.updates.adam(loss, update_params,
lr_var, self.b1)
# compile the function
self.train_fn = theano.function(
[self.pose_input_var, self.noise_input_var, lr_var],
loss,
updates = update_vars)
self.recons_fn = theano.function(
[self.pose_input_var, self.noise_input_var],
self.recons_tvar
)
self.encode_fn = theano.function(
[self.pose_input_var, self.noise_input_var],
self.z_tvar
)
print '[PoseVAE]function compiled'
def __init__(self, classifier, criterion, learning_rate, trainset, clip_threshold=0):
self.eta = learning_rate
self.is_weighted = trainset.is_weighted
self.trainset = trainset
if clip_threshold > 0:
gparams = [T.clip(T.grad(criterion.cost, param), -clip_threshold, clip_threshold) for param in classifier.params]
else:
gparams = [T.grad(criterion.cost, param) for param in classifier.params]
lr = T.fscalar()
updates = [
(param, param - lr * gparam)
for param, gparam in zip(classifier.params, gparams)
]
x = classifier.input
y = criterion.y
if self.is_weighted:
w = criterion.w
self.step_func = theano.function(
inputs=[x, y, w, lr],
outputs=[criterion.cost] + gparams,
updates=updates,
)
else:
self.step_func = theano.function(
inputs=[x, y, lr],
outputs=[criterion.cost] + gparams,
updates=updates,
)
def train_linreg(X_train, y_train, eta, epochs):
costs = []
eta0 = T.fscalar('eta0')
y = T.fvector(name='y')
X = T.fmatrix(name='X')
w = theano.shared(
np.zeros(shape=(X_train.shape[1] + 1), dtype=theano.config.floatX),
name='w',
)
net_input = T.dot(X, w[1:]) + w[0]
errors = y - net_input
cost = T.sum(T.pow(errors, 2))
gradient = T.grad(cost, wrt=w)
update = [(w, w - (eta0 * gradient))]
train = theano.function(
inputs=[eta0],
outputs=cost,
updates=update,
givens={X: X_train, y: y_train},
)
for _ in range(epochs):
costs.append(train(eta))
return costs, w
model.py 文件源码
项目:Towards-a-Biologically-Plausible-Backprop
作者: bscellier
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def __build_free_phase(self):
n_iterations = T.iscalar('n_iterations')
epsilon = T.fscalar('epsilon')
def step(*layers):
E_sum = T.sum(self.__energy(layers))
layers_dot = T.grad(-E_sum, list(layers)) # temporal derivative of the state (free trajectory)
layers_new = [layers[0]]+[T.clip(layer+epsilon*dot,0.,1.) for layer,dot in zip(layers,layers_dot)][1:]
return layers_new
( layers, updates ) = theano.scan(
step,
outputs_info=self.layers,
n_steps=n_iterations
)
layers_end = [layer[-1] for layer in layers]
for particles,layer,layer_end in zip(self.persistent_particles,self.layers[1:],layers_end[1:]):
updates[particles] = T.set_subtensor(layer,layer_end)
free_phase = theano.function(
inputs=[n_iterations,epsilon],
outputs=[],
updates=updates
)
return free_phase
model2.py 文件源码
项目:Towards-a-Biologically-Plausible-Backprop
作者: bscellier
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __build_positive_phase(self):
n_iterations = T.iscalar('n_iterations')
alphas = [T.fscalar("alpha_W"+str(r+1)) for r in range(len(self.weights))]
def backprop(*layers):
layers_new = [layers[-1]]
for k in range(len(self.layers)-2,0,-1):
layers_new += [pi(T.dot(layers[k-1], self.weights[k-1]) + T.dot(layers_new[-1], self.weights[k].T) + self.biases[k])]
layers_new += [layers[0]]
layers_new.reverse()
return layers_new
( layers, updates ) = theano.scan(
backprop,
outputs_info=self.layers[:-1]+[self.y_data_one_hot],
n_steps=n_iterations
)
layers_new = [layer[-1] for layer in layers]
Delta_layers = [(layer_new-layer) for layer_new,layer in zip(layers_new[1:],self.layers[1:])]
biases_new = [b + alpha * T.mean(Delta, axis=0) for b,alpha,Delta in zip(self.biases[1:],alphas,Delta_layers)]
weights_new = [W + alpha * T.dot(layer.T, Delta) / T.cast(self.layers[0].shape[0], dtype=theano.config.floatX) for W, alpha, layer, Delta in zip(self.weights, alphas, self.layers[:-1], Delta_layers)]
for bias, bias_new in zip(self.biases[1:],biases_new):
updates[bias]=bias_new
for weight, weight_new in zip(self.weights,weights_new):
updates[weight]=weight_new
positive_phase = theano.function(
inputs=[n_iterations]+alphas,
outputs=[],
updates=updates
)
return positive_phase
def test_no_complex():
width_var = tensor.cscalar()
freq_var = tensor.fscalar()
signal_var = tensor.fscalar()
stft_out = tensor.exp(width_var * freq_var) * signal_var
theano.function([width_var, freq_var, signal_var], stft_out,
mode=mode_with_gpu)
def test_copy_share_memory(self):
x = T.fscalar('x')
# SharedVariable for tests, one of them has update
y = theano.shared(value=1)
z = theano.shared(value=2)
out = T.tanh((x + y + 2) / (x + z - 0.2)**2)
# Test for different linkers
for mode in ["FAST_RUN", "FAST_COMPILE"]:
ori = theano.function([x], [out], mode=mode, updates={z: z + 1})
cpy = ori.copy(share_memory=True)
# Test if memories shared
storage_map_ori = ori.fn.storage_map
storage_map_cpy = cpy.fn.storage_map
fgraph_cpy = cpy.maker.fgraph
# Assert intermediate and Constants storages are shared.
# and output stoarges are not shared
i_o_variables = fgraph_cpy.inputs + fgraph_cpy.outputs
ori_storages = storage_map_ori.values()
l = [val for key, val in storage_map_cpy.items()
if key not in i_o_variables or isinstance(key, theano.tensor.Constant)]
for storage in l:
self.assertTrue(any([storage is s for s in ori_storages]))
# Assert storages of SharedVariable without updates are shared
for (input, _1, _2), here, there in zip(ori.indices,
ori.input_storage,
cpy.input_storage):
self.assertTrue(here.data is there.data)
def test_copy_delete_updates(self):
w = T.iscalar('w')
x = T.fscalar('x')
# SharedVariable for tests, one of them has update
y = theano.shared(value=1, name='y')
z = theano.shared(value=2, name='z')
out = x + y + z
# Test for different linkers
# for mode in ["FAST_RUN","FAST_COMPILE"]:
# second_time = False
for mode in ["FAST_RUN", "FAST_COMPILE"]:
ori = theano.function([x], out, mode=mode, updates={z: z * 2})
cpy = ori.copy(delete_updates=True)
assert cpy(1)[0] == 4
assert cpy(1)[0] == 4
assert cpy(1)[0] == 4
# Test if unused implicit and explicit inputs from delete_updates
# are ignored as intended.
for mode in ["FAST_RUN", "FAST_COMPILE"]:
ori = theano.function([x], x, mode=mode, updates={z: z * 2})
cpy = ori.copy(delete_updates=True)
ori = theano.function([x, w], x, mode=mode, updates={z: z + w})
cpy = ori.copy(delete_updates=True)
def test_allow_downcast_floatX(self):
a = tensor.fscalar('a')
b = tensor.fvector('b')
f = pfunc([a, b], (a + b), allow_input_downcast=True)
g = pfunc([a, b], (a + b), allow_input_downcast=False)
h = pfunc([a, b], (a + b), allow_input_downcast=None)
# If the values can be accurately represented, OK
assert numpy.all(f(0, [0]) == 0)
assert numpy.all(g(0, [0]) == 0)
assert numpy.all(h(0, [0]) == 0)
# For the vector: OK iff allow_input_downcast is True
assert numpy.allclose(f(0, [0.1]), 0.1)
self.assertRaises(TypeError, g, 0, [0.1])
self.assertRaises(TypeError, h, 0, [0.1])
# For the scalar: OK if allow_input_downcast is True,
# or None and floatX==float32
assert numpy.allclose(f(0.1, [0]), 0.1)
self.assertRaises(TypeError, g, 0.1, [0])
if config.floatX == 'float32':
assert numpy.allclose(h(0.1, [0]), 0.1)
else:
self.assertRaises(TypeError, h, 0.1, [0])
def test_scalar(self):
x = cuda.fscalar()
y = numpy.array(7, dtype='float32')
assert y.size == theano.function([x], x.size)(y)
def setUp(self):
self.initial = -2.0
self.approx_solution = 0.0
x = theano.shared(np.array(self.initial, dtype='float32'))
self.params = [x]
self.inputs = []
loss = -T.nnet.sigmoid(10.0 * x) * T.nnet.sigmoid(-10.0 * x)
self.loss = loss
x_sub = T.fscalar('x sub')
self.get_loss = theano.function([x_sub] + self.inputs, self.loss, givens=[(self.params[0], x_sub)])
def _train_procedures(self):
self.learning_rate = T.fscalar('learning rate')
self.grads_generator = theano.grad(self.loss_generator, self.params_generator)
self.grads_generator_clipped = updates.total_norm_constraint(
self.grads_generator, max_norm=self.grad_clip_norm
)
upd_generator = updates.sgd(
self.grads_generator_clipped, self.params_generator,
learning_rate=self.learning_rate
)
self.train_generator = theano.function(
[self.X_geant_raw, self.learning_rate],
self.loss_pseudo,
updates=upd_generator
)
self.grads_discriminator = theano.grad(self.loss_discriminator, self.params_discriminator)
self.grads_discriminator_clipped = updates.total_norm_constraint(
self.grads_discriminator, max_norm=self.grad_clip_norm
)
upd_discriminator = updates.sgd(
self.grads_discriminator_clipped, self.params_discriminator,
learning_rate=self.learning_rate
)
self.train_discriminator = theano.function(
[self.X_geant_raw, self.X_real_raw, self.learning_rate],
[self.loss_pseudo, self.loss_real],
updates=upd_discriminator
)
self.anneal_discriminator = nn.updates.sa(
[self.X_geant_raw, self.X_real_raw], self.loss_discriminator,
params=self.params_discriminator,
**self.annealing_args
)
def adastep(
inputs, loss, params, outputs=(),
max_iter=8, rho = 0.9, momentum=None,
initial_learning_rate = 1.0e-3, max_learning_rate=1.0, max_delta = 1.0e-1, eps=1.0e-6):
cache_inputs, cache_grads, get_loss, set_params = grad_base(
inputs, loss, params, outputs, norm_gradients=False, momentum=momentum
)
one = T.constant(1.0, dtype='float32')
v = theano.shared(
np.float32(initial_learning_rate), name = 'v'
)
new_v = T.fscalar()
upd_v = OrderedDict()
upd_v[v] = v * rho + new_v * (one - rho)
update_v = theano.function([new_v], v, updates=upd_v, no_default_updates=True)
get_v = theano.function([], v, no_default_updates=True)
return _adastep(
cache_inputs, cache_grads, get_loss, set_params,
get_v, update_v,
max_iter=max_iter,
max_learning_rate=max_learning_rate,
max_delta=max_delta,
eps=eps
)
def optimize_gan_hkl(self, model, lam1=0.00001):
"""
optimizer for hkl packaged dataset.
Returns the updates for discirminator & generator and computed costs for the model.
"""
i = T.iscalar('i');
lr = T.fscalar('lr');
Xu = T.fmatrix('X');
cost_disc = model.cost_dis(Xu, self.batch_sz) \
+ lam1 * model.dis_network.weight_decay_l2()
gparams_dis = T.grad(cost_disc, model.dis_network.params)
cost_gen = model.cost_gen(self.batch_sz)
gparams_gen = T.grad(cost_gen, model.gen_network.params)
updates_dis = self.ADAM(model.dis_network.params, gparams_dis, lr)
updates_gen = self.ADAM(model.gen_network.params, gparams_gen, lr)
discriminator_update = theano.function([Xu, theano.Param(lr,default=self.epsilon_dis)],\
outputs=cost_disc, updates=updates_dis)
generator_update = theano.function([theano.Param(lr,default=self.epsilon_gen)],\
outputs=cost_gen, updates=updates_gen)
get_valid_cost = theano.function([Xu], outputs=[cost_disc, cost_gen])
get_test_cost = theano.function([Xu], outputs=[cost_disc, cost_gen])
return discriminator_update, generator_update, get_valid_cost, get_test_cost
def optimize_gan(self, model, train_set, valid_set, test_set, lam1=0.00001):
"""
optimizer for non packaged dataset,
returning updates for discriminator & generator, as well as the computed costs.
"""
i = T.iscalar('i'); lr = T.fscalar('lr');
Xu = T.matrix('X');
cost_disc = model.cost_dis(Xu, self.batch_sz) \
+ lam1 * model.dis_network.weight_decay_l2()
gparams_dis = T.grad(cost_disc, model.dis_network.params)
cost_gen = model.cost_gen(self.batch_sz)
gparams_gen = T.grad(cost_gen, model.gen_network.params)
updates_dis = self.ADAM(model.dis_network.params, gparams_dis, lr)
updates_gen = self.ADAM(model.gen_network.params, gparams_gen, lr)
discriminator_update = theano.function([i, theano.Param(lr,default=self.epsilon_dis)],\
outputs=cost_disc, updates=updates_dis,\
givens={Xu:train_set[0][i*self.batch_sz:(i+1)*self.batch_sz]})
generator_update = theano.function([theano.Param(lr,default=self.epsilon_gen)],\
outputs=cost_gen, updates=updates_gen)
get_valid_cost = theano.function([i], outputs=[cost_disc, cost_gen],\
givens={Xu:valid_set[0][i*self.batch_sz:(i+1)*self.batch_sz]})
get_test_cost = theano.function([i], outputs=[cost_disc, cost_gen],\
givens={Xu:test_set[0][i*self.batch_sz:(i+1)*self.batch_sz]})
return discriminator_update, generator_update, get_valid_cost, get_test_cost
1-train-CBOW.py 文件源码
项目:Deep-Learning-with-Theano
作者: PacktPublishing
项目源码
文件源码
阅读 58
收藏 0
点赞 0
评论 0
def get_train_model(data, inputs, loss, params, batch_size=32):
"""
trainer: Function to define the trainer of the model on the data set that bassed as the parameters of the function
parameters:
contexts: List of the contexts (the input of the trainer)
targets: List of the targets.
return:
Theano function represents the train model
"""
data_contexts = data[0]
data_targets = data[1]
context = inputs[0]
target = inputs[1]
learning_rate = T.fscalar('learning_rate') # theano input: the learning rate, the value of this input
# can be constant like 0.1 or
#it can be come from a function like a decay learning rate function
index = T.lscalar('index') # the index of minibatch
g_params = T.grad(cost=loss, wrt=params)
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(params, g_params)
]
train_fun = theano.function(
[index, learning_rate],
loss,
updates=updates,
givens={
context: data_contexts[index * args.batch_size: (index + 1) * args.batch_size],
target: data_targets[index * args.batch_size: (index + 1) * args.batch_size]
}
)
return train_fun
def __init__(self, classifier, criterion, learning_rate, trainset, clip_threshold=0):
self.eta = learning_rate
self.is_weighted = trainset.is_weighted
if clip_threshold > 0:
gparams = [T.clip(T.grad(criterion.cost, param), -clip_threshold, clip_threshold) for param in classifier.params]
else:
gparams = [T.grad(criterion.cost, param) for param in classifier.params]
lr = T.fscalar()
updates = [
(param, param - lr * gparam)
for param, gparam in zip(classifier.params, gparams)
]
index = T.lscalar() # index to a [mini]batch
x = classifier.input
y = criterion.y
if self.is_weighted:
w = criterion.w
self.step_func = theano.function(
inputs=[index, lr],
outputs=[criterion.cost] + gparams,
updates=updates,
givens={
x: trainset.get_x(index),
y: trainset.get_y(index),
w: trainset.get_w(index)
}
)
else:
self.step_func = theano.function(
inputs=[index, lr],
outputs=[criterion.cost] + gparams,
updates=updates,
givens={
x: trainset.get_x(index),
y: trainset.get_y(index)
}
)