def _generate_train_model_function(self, scores):
u = T.lvector('u')
i = T.lvector('i')
j = T.lvector('j')
self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
self.S = theano.shared(scores, name='S');
x_ui = T.dot(self.W, self.S[u,i,:].T);
x_uj = T.dot(self.W, self.S[u,j,:].T);
x_uij = x_ui - x_uj;
obj = T.sum(
T.log(T.nnet.sigmoid(x_uij)).sum() - \
self._lambda_w * 0.5 * (self.W ** 2).sum()
)
cost = -obj
g_cost_W = T.grad(cost=cost, wrt=self.W)
updates = [
(self.W, self.W - self._learning_rate * g_cost_W)
]
self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
python类function()的实例源码
def compile_function(inputs=None, outputs=None, updates=None, givens=None, log_name=None, **kwargs):
import theano
if log_name:
msg = Message("Compiling function %s" % log_name)
msg.__enter__()
ret = theano.function(
inputs=inputs,
outputs=outputs,
updates=updates,
givens=givens,
on_unused_input='ignore',
allow_input_downcast=True,
**kwargs
)
if log_name:
msg.__exit__(None, None, None)
return ret
def hamiltonian(pos, vel, energy_fn):
"""
Returns the Hamiltonian (sum of potential and kinetic energy) for the given
velocity and position.
Parameters
----------
pos: theano matrix
Symbolic matrix whose rows are position vectors.
vel: theano matrix
Symbolic matrix whose rows are velocity vectors.
energy_fn: python function
Python function, operating on symbolic theano variables, used tox
compute the potential energy at a given position.
Returns
-------
return: theano vector
Vector whose i-th entry is the Hamiltonian at position pos[i] and
velocity vel[i].
"""
# assuming mass is 1
return energy_fn(pos) + kinetic_energy(vel)
def draw(self, **kwargs):
"""
Returns a new position obtained after `n_steps` of HMC simulation.
Parameters
----------
kwargs: dictionary
The `kwargs` dictionary is passed to the shared variable
(self.positions) `get_value()` function. For example, to avoid
copying the shared variable value, consider passing `borrow=True`.
Returns
-------
rval: numpy matrix
Numpy matrix whose of dimensions similar to `initial_position`.
"""
self.simulate()
return self.positions.get_value(borrow=False)
def hamiltonian(pos, vel, energy_fn):
"""
Returns the Hamiltonian (sum of potential and kinetic energy) for the given
velocity and position.
Parameters
----------
pos: theano matrix
Symbolic matrix whose rows are position vectors.
vel: theano matrix
Symbolic matrix whose rows are velocity vectors.
energy_fn: python function
Python function, operating on symbolic theano variables, used tox
compute the potential energy at a given position.
Returns
-------
return: theano vector
Vector whose i-th entry is the Hamiltonian at position pos[i] and
velocity vel[i].
"""
# assuming mass is 1
return energy_fn(pos) + kinetic_energy(vel)
def build_model(model_):
global fn_predict, fn_record
global g_ozer, g_mdl
g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]()
g_ozer.lr = LEARN_RATE
s_x = T.tensor4('x')
s_y = T.ivector('y')
s_pdpo = T.scalar()
s_out = model_(s_x, s_pdpo)
s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map))
s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3))
s_accr = T.mean( T.switch(
T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0))
no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))]
fn_predict = th.function(
[s_x, s_y],
{'pred':s_out, 'accr':s_accr, 'loss':s_loss},
givens=no_dropout, profile=PROFILE)
rec_fetches = {
'x': s_x, 'y': s_y,
'pred': s_out}
rec_fetches.update(g_mdl.params_di)
fn_record = th.function(
[s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE)
g_ozer.compile(
[s_x, s_y],
s_loss,
g_mdl.params_di.values(),
fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr},
givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))],
profile_=PROFILE)
def _get_p_from_g(self, cg_id, g, params):
"""
Utility function to pick the parameter given gradient.
"""
p_name = re.search('\(dcost_' + cg_id + '/d(.+?)\)', g.name).group(1)
return params[p_name]
def f_log_probs(self, probs, x, x_mask, y, y_mask,
src_selector, trg_selector, cg=None):
y_flat = y.flatten()
y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
cost = -tensor.log(probs.flatten()[y_flat_idx])
cost = cost.reshape([y.shape[0], y.shape[1]])
cost = (cost * y_mask).sum(0)
func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector]
return theano.function(
inputs=func_inps,
outputs=cost, on_unused_input='warn')
def load_data(data_feeder):
"""
Helper function to deal with interface of different datasets.
`data_feeder` should be `train_feeder`, `valid_feeder`, or `test_feeder`.
"""
return data_feeder(BATCH_SIZE,
SEQ_LEN,
OVERLAP,
Q_LEVELS,
Q_ZERO,
Q_TYPE)
### Creating computation graph ###
def load_data(data_feeder):
"""
Helper function to deal with interface of different datasets.
`data_feeder` should be `train_feeder`, `valid_feeder`, or `test_feeder`.
"""
return data_feeder(BATCH_SIZE,
SEQ_LEN,
OVERLAP,
Q_LEVELS,
Q_ZERO,
Q_TYPE)
### Creating computation graph ###
def load_data(data_feeder):
"""
Helper function to deal with interface of different datasets.
`data_feeder` should be `train_feeder`, `valid_feeder`, or `test_feeder`.
"""
return data_feeder(BATCH_SIZE,
SEQ_LEN,
OVERLAP,
Q_LEVELS,
Q_ZERO,
Q_TYPE)
### Creating computation graph ###
def load_model():
"""
Load the model with saved tables
"""
# Load model options
print('Loading model parameters...')
with open('%s.pkl'%path_to_umodel, 'rb') as f:
uoptions = pkl.load(f)
with open('%s.pkl'%path_to_bmodel, 'rb') as f:
boptions = pkl.load(f)
# Load parameters
uparams = init_params(uoptions)
uparams = load_params(path_to_umodel, uparams)
utparams = init_tparams(uparams)
bparams = init_params_bi(boptions)
bparams = load_params(path_to_bmodel, bparams)
btparams = init_tparams(bparams)
# Extractor functions
print('Compiling encoders...')
embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')
# Tables
print('Loading tables...')
utable, btable = load_tables()
# Store everything we need in a dictionary
print('Packing up...')
model = {}
model['uoptions'] = uoptions
model['boptions'] = boptions
model['utable'] = utable
model['btable'] = btable
model['f_w2v'] = f_w2v
model['f_w2v2'] = f_w2v2
return model
def get_eval_function(self):
""" We should feed in non-dimshuffled inputs x0, mask0 and y0.
Used for tracking Dev loss at training time.
"""
loss = CrossEntropyLoss().connect(self.scores, self.mask, self.y)
return theano.function([self.x0, self.mask0, self.y0], [self.pred0, loss],
name='f_eval',
allow_input_downcast=True,
on_unused_input='warn',
givens=({self.is_train: numpy.cast['int8'](0)}))
def get_distribution_function(self):
""" Return predictions and scores of shape [batch_size, time_steps, label space size].
Used at test time.
"""
scores0 = self.scores.reshape([self.x.shape[0], self.x.shape[1],
self.label_space_size]).dimshuffle(1, 0, 2)
return theano.function([self.x0, self.mask0], [self.pred0, scores0],
name='f_pred',
allow_input_downcast=True,
on_unused_input='warn',
givens=({self.is_train: numpy.cast['int8'](0)}))
def get_loss_function(self):
""" We should feed in non-dimshuffled inputs x0, mask0 and y0.
"""
loss = CrossEntropyLoss().connect(self.scores, self.mask, self.y)
grads = gradient_clipping(tensor.grad(loss, self.params),
self.max_grad_norm)
updates = adadelta(self.params, grads)
return theano.function([self.x0, self.mask0, self.y0], loss,
name='f_loss',
updates=updates,
on_unused_input='warn',
givens=({self.is_train: numpy.cast['int8'](1)}))
def load_model():
"""
Load the model with saved tables
"""
# Load model options
print 'Loading model parameters...'
with open('%s.pkl'%path_to_umodel, 'rb') as f:
uoptions = pkl.load(f)
with open('%s.pkl'%path_to_bmodel, 'rb') as f:
boptions = pkl.load(f)
# Load parameters
uparams = init_params(uoptions)
uparams = load_params(path_to_umodel, uparams)
utparams = init_tparams(uparams)
bparams = init_params_bi(boptions)
bparams = load_params(path_to_bmodel, bparams)
btparams = init_tparams(bparams)
# Extractor functions
print 'Compiling encoders...'
embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')
# Tables
print 'Loading tables...'
utable, btable = load_tables()
# Store everything we need in a dictionary
print 'Packing up...'
model = {}
model['uoptions'] = uoptions
model['boptions'] = boptions
model['utable'] = utable
model['btable'] = btable
model['f_w2v'] = f_w2v
model['f_w2v2'] = f_w2v2
return model
def setupDebugFunctions(self):
batch_size = self.cfgParams.batch_size
print("compiling compute_train_descr() ... ")
givens_train_descr = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
self.compute_train_descr = theano.function(inputs=[self.index],
outputs=self.poseNet.output,
givens=givens_train_descr)
print("done.")
def setupTrain(self):
# train_model is a function that updates the model parameters by SGD
opt = Optimizer(self.grads, self.params)
self.updates = opt.RMSProp(self.learning_rate, 0.9, 1.0/100.)
batch_size = self.cfgParams.batch_size
givens_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
for i in range(1, self.poseNet.cfgParams.numInputs):
givens_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size]
givens_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size]
print("compiling train_model() ... ")
self.train_model = theano.function(inputs=[self.index, self.learning_rate],
outputs=self.cost,
updates=self.updates,
givens=givens_train)
print("done.")
print("compiling test_model_on_train() ... ")
batch_size = self.cfgParams.batch_size
givens_test_on_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
for i in range(1, self.poseNet.cfgParams.numInputs):
givens_test_on_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size]
givens_test_on_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size]
self.test_model_on_train = theano.function(inputs=[self.index],
outputs=self.errors,
givens=givens_test_on_train)
print("done.")
def setupValidate(self):
batch_size = self.cfgParams.batch_size
givens_val = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
for i in range(1, self.poseNet.cfgParams.numInputs):
givens_val[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size]
givens_val[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size]
givens_val_cost = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
for i in range(1, self.poseNet.cfgParams.numInputs):
givens_val_cost[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size]
givens_val_cost[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size]
print("compiling validation_cost() ... ")
self.validation_cost = theano.function(inputs=[self.index],
outputs=self.cost,
givens=givens_val_cost)
print("done.")
print("compiling validation_error() ... ")
self.validation_error = theano.function(inputs=[self.index],
outputs=self.errors,
givens=givens_val)
print("done.")
# debug and so
print("compiling compute_val_descr() ... ")
givens_val_descr = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]}
for i in range(1, self.poseNet.cfgParams.numInputs):
givens_val_descr[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size]
self.compute_val_descr = theano.function(inputs=[self.index],
outputs=self.poseNet.output,
givens=givens_val_descr)
print("done.")
def compile_sample(self):
"""
build the sampler function here <:::>
"""
# context vectors (as)
self.decoder.build_sampler()
l = T.iscalar()
logger.info("compiling the computational graph :: action sampler")
self.action_sampler = theano.function([l], self.rng.normal((l, self.config['action_dim'])))
action = T.matrix()
logger.info("compiling the compuational graph ::transform function::")
self.transform = theano.function([action], self.context_trans(action))
logger.info("display functions compile done.")