def pretraining_functions(self, train_set_x, batch_size, k):
'''Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as input the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates = rbm.get_cost_updates(learning_rate,
persistent=None, k=k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.In(learning_rate, value=0.1)],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin:batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
python类In()的实例源码
def pretraining_functions(self, pre_train_set_x):
''' Generates a list of functions, each of them implementing one
step in trainnig the dA corresponding to the layer with same index.
The function will require as input the minibatch index, and to train
a dA you just need to iterate, calling the corresponding function on
all minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared variable that contains all datapoints used
for training the dA
:type batch_size: int
:param batch_size: size of a [mini]batch
:type learning_rate: float
:param learning_rate: learning rate used during training for any of
the dA layers
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
corruption_level = T.scalar('corruption') # % of corruption to use
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * self.pretrain_batch_size
# ending of a batch given `index`
batch_end = batch_begin + self.pretrain_batch_size
pretrain_fns = []
for dA in self.dA_layers:
# get the cost and the updates list
cost, updates = dA.get_cost_updates(corruption_level,
learning_rate)
# compile the theano function
fn = theano.function(
inputs=[
index,
theano.In(corruption_level, value=0.2),
theano.In(learning_rate, value=0.1)
],
outputs=cost,
updates=updates,
givens={
self.x: pre_train_set_x[batch_begin: batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def pretraining_functions(self, train_set_x):
'''Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as input the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * self.pretrain_batch_size
# ending of a batch given `index`
batch_end = batch_begin + self.pretrain_batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates = rbm.get_cost_updates(learning_rate,
persistent=None, k=self.k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.In(learning_rate, value=0.1)],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin:batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def pretraining_functions(self, train_set_x, batch_size):
''' Generates a list of functions, each of them implementing one
step in trainnig the dA corresponding to the layer with same index.
The function will require as input the minibatch index, and to train
a dA you just need to iterate, calling the corresponding function on
all minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared variable that contains all datapoints used
for training the dA
:type batch_size: int
:param batch_size: size of a [mini]batch
:type learning_rate: float
:param learning_rate: learning rate used during training for any of
the dA layers
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
corruption_level = T.scalar('corruption') # % of corruption to use
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for dA in self.dA_layers:
# get the cost and the updates list
cost, updates = dA.get_cost_updates(corruption_level,
learning_rate)
# compile the theano function
fn = theano.function(
inputs=[
index,
theano.In(corruption_level, value=0.2),
theano.In(learning_rate, value=0.1)
],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin: batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def __theano_build_train__(self):
params = self.params
params_names = self.param_names
hidden_dim = self.hidden_dim
batch_size = self.batch_size
# inputs[0], first sentence.
# inputs[1], second sentence.
# inputs[2], encoding target
inputs = T.itensor3("inputs")
masks = T.ftensor3("masks")
def rnn_cell(x, mx, ph, Wh):
h = T.tanh(ph.dot(Wh) + x)
h = mx[:, None] * h + (1-mx[:, None]) * ph
return [h] # size = sample * hidden : 3 * 4
# encoding first sentence
_state = params["E"][inputs[0].flatten(), :].reshape([inputs[0].shape[0], inputs[0].shape[1], hidden_dim])
_state = _state.dot(params["W"][0]) + params["B"][0]
[h1], updates = theano.scan(
fn=rnn_cell,
sequences=[_state, masks[0]],
truncate_gradient=self.truncate,
outputs_info=[dict(initial=T.zeros([batch_size, hidden_dim]))],
non_sequences=[params["W"][1]])
# decoding second sentence
_state = params["E"][inputs[1].flatten(), :].reshape([inputs[1].shape[0], inputs[1].shape[1], hidden_dim])
_state = _state.dot(params["W"][2]) + params["B"][1]
[h2], updates = theano.scan(
fn=rnn_cell,
sequences=[_state, masks[1]],
truncate_gradient=self.truncate,
outputs_info=[dict(initial=h1[-1])],
non_sequences=[params["W"][3]])
# Loss
_s = h2.dot(params["DecodeW"]) + params["DecodeB"]
_s = _s.reshape([_s.shape[0] * _s.shape[1], _s.shape[2]])
_s = T.nnet.softmax(_s)
_cost = T.nnet.categorical_crossentropy(_s, inputs[2].flatten())
_cost = T.sum(_cost * masks[2].flatten())
# SGD parameters
learning_rate = T.scalar("learning_rate")
decay = T.scalar("decay")
_grads, _updates = rms_prop(_cost, params_names, params, learning_rate, decay)
# Assign functions
self.bptt = theano.function([inputs, masks], _grads)
self.loss = theano.function([inputs, masks], _cost)
self.weights = theano.function([inputs, masks], _s)
self.sgd_step = theano.function(
[inputs, masks, learning_rate, decay], #theano.In(decay, value=0.9)],
updates=_updates)
def test_remove0(self):
configs = [
# structure type, numpy matching class
('csc', scipy.sparse.csc_matrix),
('csr', scipy.sparse.csr_matrix), ]
for format, matrix_class in configs:
for zero, unsor in [(True, True), (True, False),
(False, True), (False, False)]:
(x,), (mat,) = sparse_random_inputs(format, (6, 8),
out_dtype=config.floatX,
explicit_zero=zero,
unsorted_indices=unsor)
assert 0 in mat.data or not zero
assert not mat.has_sorted_indices or not unsor
# the In thingy has to be there because theano has as rule not
# to optimize inputs
f = theano.function([theano.In(x, borrow=True, mutable=True)],
Remove0()(x))
# assert optimization local_inplace_remove0 is applied in
# modes with optimization
if theano.config.mode not in ['FAST_COMPILE']:
# list of apply nodes in the optimized graph.
nodes = f.maker.fgraph.toposort()
# Check there isn't any Remove0 instance not inplace.
assert not any([isinstance(node.op, Remove0) and
not node.op.inplace for node in nodes]), (
'Inplace optimization should have been applied')
# Check there is at least one Remove0 inplace.
assert any([isinstance(node.op, Remove0) and node.op.inplace
for node in nodes])
# checking
# makes sense to change its name
target = mat
result = f(mat)
mat.eliminate_zeros()
msg = 'Matrices sizes differ. Have zeros been removed ?'
assert result.size == target.size, msg
if unsor:
assert not result.has_sorted_indices
assert not target.has_sorted_indices
else:
assert result.has_sorted_indices
assert target.has_sorted_indices
def just_gemm(i, o, ishapes=[(4, 3), (3, 5), (4, 5), (), ()],
max_graphlen=0, expected_nb_gemm=1):
try:
f = inplace_func(
[In(ii, mutable=True, allow_downcast=True) for ii in i],
o,
mode='FAST_RUN',
on_unused_input='ignore')
nb_gemm = 0
for node in f.maker.fgraph.apply_nodes:
if isinstance(node.op, T.Dot):
raise Failure('dot not changed to gemm_inplace in graph')
if node.op == _dot22:
raise Failure('_dot22 not changed to gemm_inplace in graph')
if node.op == gemm_inplace:
nb_gemm += 1
assert nb_gemm == expected_nb_gemm, (nb_gemm, expected_nb_gemm)
g = inplace_func(i, o, mode=compile.Mode(linker='py', optimizer=None),
allow_input_downcast=True, on_unused_input='ignore')
for node in g.maker.fgraph.apply_nodes:
if node.op == gemm_inplace:
raise Exception('gemm_inplace in original graph')
graphlen = len(f.maker.fgraph.toposort())
if max_graphlen and (graphlen <= max_graphlen):
# theano.printing.debugprint(f)
assert False, 'graphlen=%i>%i' % (graphlen, max_graphlen)
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r0 = f(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r1 = g(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
max_abs_err = numpy.max(numpy.abs(r0[0] - r1[0]))
eps = 1.0e-8
if config.floatX == 'float32':
eps = 1.0e-6
if max_abs_err > eps:
raise Failure('GEMM is computing the wrong output. max_rel_err =',
max_abs_err)
except Failure:
for node in f.maker.fgraph.toposort():
print('GRAPH', node)
raise
def test_gemm_opt_double_gemm():
"""This is the pattern that shows up in the autoencoder"""
X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
R, S, c = T.matrix(), T.matrix(), T.scalar()
just_gemm([X, Y, Z, a, b, R, S, c],
[Z * c + a * T.dot(X, Y) + b * T.dot(R, S).T],
ishapes=[(4, 3), (3, 5), (4, 5), (), (), (5, 9), (9, 4), ()],
expected_nb_gemm=2)
ishapes = [(4, 3), (3, 5), (4, 5), (), (), (5, 9), (9, 4), ()]
i = [X, Y, Z, a, b, R, S, c]
o = [(a * T.dot(X, Y)
+ gemm_inplace(Z, b, S.T, R.T, T.constant(1.0).astype(config.floatX)))]
try:
f = inplace_func([In(ii, mutable=True) for ii in i], o,
mode='FAST_RUN', on_unused_input='ignore')
for node in f.maker.fgraph.apply_nodes:
if isinstance(node.op, T.Dot):
raise Failure('dot in graph')
if node.op == _dot22:
raise Failure('_dot22 in graph')
g = inplace_func(i, o, mode=compile.Mode(linker='py', optimizer=None),
on_unused_input='ignore')
# for node in g.maker.fgraph.apply_nodes:
# if node.op == gemm_inplace: raise Failure('gemm_inplace in graph')
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r0 = f(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r1 = g(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
max_abs_err = numpy.max(numpy.abs(r0[0] - r1[0]))
eps = 1.0e-8
if config.floatX == 'float32':
eps = 1.0e-6
if max_abs_err > eps:
raise Failure(
'GEMM is computing the wrong output. max_rel_err =',
max_abs_err)
except Failure:
for node in f.maker.fgraph.toposort():
print('GRAPH', node)
raise
def test_missing_inputs(self):
def fn():
x, s = T.scalars('xs')
function([], [x])
checkfor(self, fn, MissingInputError)
def fn():
x, s = T.scalars('xs')
# Ignore unused input s, as it hides the other error
function([s], [x], on_unused_input='ignore')
checkfor(self, fn, MissingInputError)
def fn():
x, s = T.scalars('xs')
function([s], [x])
checkfor(self, fn, UnusedInputError)
def fn():
x, s = T.scalars('xs')
# Ignore unused input s, as it hides the other error
function([s], x, on_unused_input='ignore')
checkfor(self, fn, MissingInputError)
def fn():
x, s = T.scalars('xs')
function([s], x)
checkfor(self, fn, UnusedInputError)
def fn():
x, s = T.scalars('xs')
# Ignore unused input s, as it hides the other error
function([s], Out(x), on_unused_input='ignore')
checkfor(self, fn, MissingInputError)
def fn():
x, s = T.scalars('xs')
function([s], Out(x))
checkfor(self, fn, UnusedInputError)
def fn():
x, s = T.scalars('xs')
function([In(x, update=s + x)], x)
checkfor(self, fn, MissingInputError)
def fn():
x, s = T.scalars('xs')
function([In(x, update=((s * s) + x))], x)
checkfor(self, fn, MissingInputError)
def test_sparse_input_aliasing_affecting_inplace_operations(self):
##
# Note this test will never fail because I am not aware of any
# inplace op on sparse variables
try:
import scipy.sparse as sp
except ImportError:
# The variable enable_sparse will be used to disable the test file.
pass
from theano.sparse import enable_sparse
if not enable_sparse:
raise SkipTest('Optional package sparse disabled')
from theano import sparse
# Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
# you need to make in inputs mutable (so that inplace
# operations are used) and to break the elemwise composition
# with some non-elemwise op (here dot)
x = sparse.SparseType('csc', dtype='float64')()
y = sparse.SparseType('csc', dtype='float64')()
f = theano.function([theano.In(x, mutable=True),
theano.In(y, mutable=True)],
(x + y) + (x + y))
# Test 1. If the same variable is given twice
# Compute bogus values
m = sp.csc_matrix(numpy.asarray(
[[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 1]], dtype='float64'))
bogus_vals = f(m, m)
# Since we used inplace operation v and m may be corrupted
# so we need to recreate them
m = sp.csc_matrix(numpy.asarray(
[[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 1]], dtype='float64'))
m_copy = m.copy()
vals = f(m, m_copy)
assert numpy.allclose(vals.todense(), bogus_vals.todense())
def pretrainingFunctions(self, train_set_x, batch_size, k):
"""Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as inputs the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
"""
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates, gparams = rbm.getCostUpdates(learning_rate,
persistent=None, k=k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.In(learning_rate, value=0.1)],
outputs=[cost]+gparams,
updates=updates,
givens={self.x: train_set_x[batch_begin:batch_end]}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def _buildEvaluationFunctions(self, X,n_steps,plr):
""" Build functions for evaluation. X: input,evaluation_bound: bound for evaluation
evaldict: dictionary containing z/mu/logcov and other arrays that might need inspection
additional_inputs: used to support finopt where you need to have n_steps etc
"""
self._p('Evaluation: Setting opt_method: ADAM, 100 steps w/ 8e-3 lr')
evaldict0, evaldictopt, evaldictf = {}, {}, {}
elbo_init = self._ELBO(X, savedict = evaldict0)
elbo_init_batch = evaldict0['elbo_batch']
mu_f, logcov_f, _ = self._optimizeVariationalParams(X,evaldict0['mu_q'],evaldict0['logcov_q'],
n_steps, plr, savedict = evaldictopt)
elbo_final = self._ELBO(X, mu_q = mu_f, logcov_q = logcov_f, savedict = evaldictf)
elbo_final_batch = evaldictf['elbo_batch']
fxn_inputs = [X]
init_val = 100
if self.params['data_type']=='image':
init_val = 5
fxn_inputs.append(theano.In(n_steps, value = init_val, name = 'n_steps'))
fxn_inputs.append(theano.In(plr, value = 8e-3, name = 'plr'))
diff_elbo, _ = self._estimateELBOEntropy(elbo_init, elbo_final, evaldict0['logcov_q'], evaldictf['logcov_q'])
self.evaluate = theano.function(fxn_inputs, [elbo_init, elbo_final,evaldictopt['n_steps'], diff_elbo], name = 'Evaluate')
self.reconstruct= theano.function([evaldictf['z']], evaldictf['mean_p'], name='Reconstruct')
self.inference = theano.function(fxn_inputs, [evaldictf['z'], evaldictf['mu_q'], evaldictf['logcov_q'] ],
name = 'Posterior Inference')
self.inference0 = theano.function([X], [evaldict0['z'], evaldict0['mu_q'], evaldict0['logcov_q'] ,evaldict0['KL']],
name = 'Posterior Inference 0 ')
self.inferencef = theano.function(fxn_inputs, [evaldictf['z'],
evaldictf['mu_q'], evaldictf['logcov_q'] ,evaldictf['KL']],
name = 'Posterior Inference F ')
#Create a theano input to estimate the Jacobian with respect to
z0 = T.vector('z')
z0.tag.test_value = np.random.randn(self.params['dim_stochastic']).astype(config.floatX)
"""
Estimating Jacobian Vectors
"""
additional = {}
lsf = self._conditionalXgivenZ(z0,additional=additional) #This computes Jacobian wrt log-probabilities, For poisson models this is the logmean
if self.params['data_type']=='real':
lsf = lsf[0]
#Grad wrt energy
jacob_energy = theano.gradient.jacobian(additional['E'],wrt=z0)
jacob_logprobs = theano.gradient.jacobian(lsf,wrt=z0)
jacob_probs = theano.gradient.jacobian(T.exp(lsf),wrt=z0)
jacob_logprobs_mnist = theano.gradient.jacobian(T.log(lsf),wrt=z0) #For use w/ binarized mnist only
self.jacobian_logprobs = theano.function([z0],jacob_logprobs,name='Jacobian wrt Log-Probs')
self.jacobian_probs = theano.function([z0],jacob_probs,name='Jacobian')
self.jacobian_energy = theano.function([z0],jacob_energy,name='Jacobian wrt energy')
#Evaluating perplexity
if self.params['data_type']=='bow':
X_count = X.sum(1,keepdims=True)
self.evaluatePerp = theano.function(fxn_inputs, [(elbo_init_batch/X_count).sum(),
(elbo_final_batch/X_count).sum(), evaldictopt['n_steps'], diff_elbo])
self.debugModel = theano.function([X], [evaldict0['elbo_batch'].sum(), evaldict0['negCLL'].sum(),evaldict0['KLmat'].sum()])
################################ Building Model #####################
def pretraining_functions(self, train_set_x, batch_size, mu):
''' Generates a list of functions, each of them implementing one
step in trainnig the dA corresponding to the layer with same index.
The function will require as input the minibatch index, and to train
a dA you just need to iterate, calling the corresponding function on
all minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared variable that contains all datapoints used
for training the dA
:type batch_size: int
:param batch_size: size of a [mini]batch
:type mu: float
:param mu: extrapolation parameter used for implementing Nesterov-type acceleration
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
corruption_level = T.scalar('corruption') # % of corruption to use
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for dA in self.dA_layers:
# get the cost and the updates list
cost, updates = dA.get_cost_updates(corruption_level,
learning_rate, mu)
# compile the theano function
fn = theano.function(
inputs=[
index,
theano.In(corruption_level),
theano.In(learning_rate)
],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin: batch_end]
},
on_unused_input='ignore'
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def pretraining_functions(self, train_set_x, batch_size, k):
'''Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as input the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates = rbm.get_cost_updates(learning_rate,
persistent=None, k=k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.In(learning_rate, value=0.1)],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin:batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns