def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
"""
compute the forward pass for a gate layer
Parameters
----------
tparams : OrderedDict of theano shared variables, {parameter name: value}
X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
options : dictionary, {hyperparameter: value}
prefix : string, layer name
pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
activ : string, activation function: 'liner', 'tanh', or 'rectifier'
Returns
-------
X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)
"""
# compute gating values, Eq.(3)
G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0])
X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),
ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)
return eval(activ)(X)
python类ifelse()的实例源码
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
"""
compute the forward pass for a concat layer
Parameters
----------
tparams : OrderedDict of theano shared variables, {parameter name: value}
X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
options : dictionary, {hyperparameter: value}
prefix : string, layer name
pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
activ : string, activation function: 'liner', 'tanh', or 'rectifier'
Returns
-------
X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)
"""
X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),
ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
tensor.dot(tensor.concatenate([X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
return eval(activ)(X)
def model(inputs, _is_training, params, batch_size, hidden_size, drop_i, drop_s, init_scale, init_H_bias, _theano_rng):
noise_i_for_H = get_dropout_noise((batch_size, hidden_size), drop_i, _theano_rng)
i_for_H = ifelse(_is_training, inputs * noise_i_for_H, inputs)
i_for_H = linear.model(i_for_H, params, hidden_size, hidden_size, init_scale, bias_init=init_H_bias)
# Dropout noise for recurrent hidden state.
noise_s = get_dropout_noise((batch_size, hidden_size), drop_s, _theano_rng)
def step(i_for_H_t, y_tm1, noise_s):
s_lm1_for_H = ifelse(_is_training, y_tm1 * noise_s, y_tm1)
return T.tanh(i_for_H_t + linear.model(s_lm1_for_H, params, hidden_size, hidden_size, init_scale))
y_0 = shared_zeros((batch_size, hidden_size), name='h0')
y, _ = theano.scan(step, sequences=i_for_H, outputs_info=[y_0], non_sequences = [noise_s])
y_last = y[-1]
sticky_state_updates = [(y_0, y_last)]
return y, y_0, sticky_state_updates
def skip_connect(self, input, layer_index):
if ([] == self.noisy_z):
raise ValueError('Error: noisy_z is an empty list, noisy_fprop must be run before skip_connect')
MU = self.compute_mu(input, self.As[layer_index])
V = self.compute_v(input, self.As[layer_index])
reconstruction = (self.noisy_z[-1] - MU) * V + MU
# # Non trainable Batchnormalisation
# mean = reconstruction.mean(0)
# std = reconstruction.std(0) + 1e-10
#
# # Only batchnormalise for a batchsize > 1
# mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
# std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))
# reconstruction = (reconstruction - mean) / std
self.tmp = reconstruction
# To caluclate the reconstruction error later
self.reconstructions.append(reconstruction)
self.noisy_z = self.noisy_z[0:-1]
return reconstruction
def forward(self, x, seq):
"""
:param x: (length, dim)
:param seq: (length - 1, 3)
:return:
"""
# (length, dim) -> (2 * length - 1, dim)
vector = T.concatenate([x, T.zeros_like(x)[:-1, :]], axis=0)
# vector = theano.printing.Print()(vector)
# scan length-1 times
hs, _ = theano.scan(fn=self.encode,
sequences=seq,
outputs_info=[vector, shared_scalar(0)],
name="compose_phrase")
comp_vec_init = hs[0][-1][-1]
comp_rec_init = T.sum(hs[1])
if self.normalize:
hidden = x[0] / x[0].norm(2)
else:
hidden = x[0]
comp_vec = ifelse(x.shape[0] > 1, comp_vec_init, hidden)
comp_rec = ifelse(x.shape[0] > 1, comp_rec_init, shared_zero_scalar())
return comp_vec, comp_rec
def forward(self, x):
"""
:param x: (length, dim)
:return: (hidden_dim, )
"""
if self.padding_size > 0:
# (padding_size + length + padding_size, dim)
x = temporal_padding_2d(x, (self.padding_size, self.padding_size))
safe_x = temporal_padding_2d(x, (0, self.kernel_size - x.shape[0]))
# If Kernel Size is greater than sentence length, padding at the end of sentence
x = ifelse(T.gt(self.kernel_size - x.shape[0], 0),
safe_x,
x)
conv_result = self.forward_conv(x)
pooling_result = get_pooling(conv_result, self.pooling)
dropout_out = dropout_from_layer(pooling_result, self.dropout)
return self.act.activate(dropout_out + self.b)
def test_lazy_if(self):
# Tests that lazy if works .. even if the two results have different
# shapes but the same type (i.e. both vectors, or matrices or
# whatnot of same dtype)
x = tensor.vector('x', dtype=self.dtype)
y = tensor.vector('y', dtype=self.dtype)
c = tensor.iscalar('c')
f = theano.function([c, x, y], ifelse(c, x, y), mode=self.mode)
self.assertFunctionContains1(f, self.get_ifelse(1))
rng = numpy.random.RandomState(utt.fetch_seed())
xlen = rng.randint(200)
ylen = rng.randint(200)
vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
assert numpy.allclose(vx, f(1, vx, vy))
assert numpy.allclose(vy, f(0, vx, vy))
def test_pushout3(self):
raise SkipTest("Optimization temporarily disabled")
x1 = tensor.scalar('x1')
y1 = tensor.scalar('x2')
y2 = tensor.scalar('y2')
c = tensor.iscalar('c')
two = numpy.asarray(2, dtype=theano.config.floatX)
x, y = ifelse(c, (x1, y1), (two, y2), name='f1')
o3 = numpy.asarray(0.3, dtype=theano.config.floatX)
o2 = numpy.asarray(0.2, dtype=theano.config.floatX)
z = ifelse(c, o3, o2, name='f2')
out = x * z * y
f = theano.function([x1, y1, y2, c], out,
allow_input_downcast=True)
assert isinstance(f.maker.fgraph.toposort()[-1].op, IfElse)
rng = numpy.random.RandomState(utt.fetch_seed())
vx1 = rng.uniform()
vy1 = rng.uniform()
vy2 = rng.uniform()
assert numpy.allclose(f(vx1, vy1, vy2, 1), vx1 * vy1 * 0.3)
assert numpy.allclose(f(vx1, vy1, vy2, 0), 2 * vy2 * 0.2)
def test_c_thunks():
a = tensor.scalars('a')
b, c = tensor.vectors('bc')
cases = [False]
if theano.config.cxx:
cases.append(True)
for c_thunks in cases:
f = function([a, b, c], ifelse(a, a * b, b * c),
mode=Mode(
optimizer=None,
linker=vm.VM_Linker(c_thunks=c_thunks,
use_cloop=False)))
f(1, [2], [3, 2])
from nose.tools import assert_raises
assert_raises(ValueError, f, 0, [2], [3, 4])
assert any([hasattr(t, 'cthunk') for t in f.fn.thunks]) == c_thunks
def remove_adjdup(x):
"""
Remove adjacent duplicate items of a vector
x: vector
return a vector with adjacent duplicate items removed, for example [1,2,2,2,3,3,4] -> [1,2,3,4]
"""
def update(x, nondup, idx):
nondup = tensor.switch(tensor.eq(nondup[idx], x), nondup, tensor.set_subtensor(nondup[idx + 1], x)) # tensor.switch is much faster than ifelse
idx = tensor.switch(tensor.eq(nondup[idx], x), idx, idx + 1)
return nondup, idx
nondup = x
idx = tensor.as_tensor_variable(0)
idx = tensor.cast(idx, 'int32')
result, updates = theano.scan(fn = update, sequences=x, outputs_info=[nondup, idx], name='remove_adjdup')
nondup = result[0][-1]
idx = result[1][-1]
return nondup[0:idx+1]
def _remove_adjdup(x):
"""
Remove adjacent duplicate items of a vector
x: vector
return a vector with adjacent duplicate items removed, for example [1,2,2,2,3,3,4] -> [1,2,3,4]
"""
def update(x, nondup, idx):
nondup = tensor.switch(tensor.eq(nondup[idx], x), nondup, tensor.set_subtensor(nondup[idx + 1], x)) # tensor.switch is much faster than ifelse
idx = tensor.switch(tensor.eq(nondup[idx], x), idx, idx + 1)
return nondup, idx
nondup = x
idx = tensor.as_tensor_variable(0)
idx = tensor.cast(idx, 'int32')
result, updates = theano.scan(fn = update, sequences=x, outputs_info=[nondup, idx], name='remove_adjdup')
nondup = result[0][-1]
idx = result[1][-1]
return nondup[0:idx+1]
def _editdist(s, t):
"""
Levenshtein's edit distance function
:param s: vector, source string
:param t: vector, target string
:return: edit distance, scalar
"""
def update(x, previous_row):
current_row = previous_row + 1
current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], tensor.add(previous_row[:-1], tensor.neq(target,x))))
current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], current_row[0:-1] + 1))
return current_row
source, target = ifelse(tensor.lt(s.shape[0], t.shape[0]), (t, s), (s, t))
previous_row = tensor.arange(target.size + 1, dtype=theano.config.floatX)
result, updates = theano.scan(fn=update, sequences=source, outputs_info=previous_row, name='editdist')
return result[-1,-1]
def encode(self, x, shape=None):
if shape is None:
xp = create_shared_variable(np.zeros((0, )*x.ndim), name='xp')
delta = ifelse(xp.size>0, x-xp, x)
else:
xp = create_shared_variable(np.zeros(shape), name='xp{}'.format(shape))
delta = x - xp
add_update(xp, x)
y = self.kp*x + self.kd*delta
if self.quantization is None:
return y
elif self.quantization=='herd':
return herd(y, shape=shape)
else:
raise Exception('No quantizer: {}'.format(self.quantization))
def geoseries_sum(r, t_end, t_start):
"""
Sum of r**t from t=t_start to t=t_end, inclusive
:param r:
:param t_end:
:param t_start:
:return:
"""
# return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1))
return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1))
def connect(self, inputs, is_train):
""" Trick to speed up model compiling at decoding time.
(Avoids building a complicated CG.)
"""
if not self.fix_mask:
self.generate_mask(inputs.shape, is_train)
if self.fast_predict:
return inputs * (1 - self.dropout_prob)
return ifelse(is_train,
inputs * self.dropout_mask,
inputs * (1 - self.dropout_prob))
def scale(X, max_norm):
curr_norm = T.sum(T.abs_(X))
return ifelse(T.lt(curr_norm, max_norm), X, max_norm * (X / curr_norm))
def fprop(self, x):
if_longer = x[:self.required]
padding = ReplicateLayer(TT.max([1, self.required - x.shape[0]]))(x[-1]).out
if_shorter = TT.concatenate([x, padding])
diff = x.shape[0] - self.required
self.out = ifelse(diff < 0, if_shorter, if_longer)
return self.out
def fprop(self, x):
if_longer = x[:self.required]
padding = ReplicateLayer(TT.max([1, self.required - x.shape[0]]))(x[-1]).out
if_shorter = TT.concatenate([x, padding])
diff = x.shape[0] - self.required
self.out = ifelse(diff < 0, if_shorter, if_longer)
return self.out
def apply_dropout(self, x, noise):
return ifelse(self._is_training, noise * x, x)
def fit(self, X, y=None):
self.n_features = y.shape[0]
self.weights['input'] = theano.shared(value=np.zeros((
self.n_features, X.shape[1], self.spatial[0], self.spatial[1]),
dtype=theano.config.floatX), name='w', borrow=True)
input = T.tensor4(name='input')
target = T.tensor4(name='target')
decay = T.scalar(name='decay')
xy = T.nnet.conv2d(input.transpose(1,0,2,3), target.transpose(1,0,2,3),
border_mode=self.pad, subsample=self.stride)
xx = T.sum(T.power(input, 2), axis=(0,2,3))
k = ifelse(self.hidden_matrices['input'] is None, )
lam = theano.shared(value=self._C, name='constrain', borrow=True)
prediction = T.nnet.conv2d(input, self.weights['input'],
border_mode=self.pad,
subsample=self.stride)
weights, _ = theano.scan(
fn=lambda a, k, c: a/(k+c), outputs_info=None,
sequences=[self.hidden_matrices['A'].transpose(1,0,2,3),
self.hidden_matrices['K']], non_sequences=lam)
new_weights = weights.transpose(1,0,2,3)
updates = [(self.hidden_matrices['K'],
self.hidden_matrices['K'].dot(decay)+xx),
(self.hidden_matrices['A'],
self.hidden_matrices['A'].dot(decay) + xy),
(self.weights['input'], new_weights)]
self.conv_fct['train'] = theano.function([input, target, decay],
prediction,
updates=updates)
self.conv_fct['predict'] = theano.function([input], prediction)
return self.conv_fct['train'](X, y, 1)
def inner_fn_sample(stm1):
prior_stmu = T.tanh( T.dot(Wl_stmu_stm1, stm1) + bl_stmu )
prior_stsig = T.nnet.softplus( T.dot(Wl_stsig_stm1, stm1) + bl_stsig ) + sig_min_states
# Set explicit prior on score during last time step
#prior_stmu = ifelse(T.lt(t,n_run_steps - 5),prior_stmu, T.set_subtensor(prior_stmu[0,:],0.1))
#prior_stsig = ifelse(T.lt(t,n_run_steps - 5),prior_stsig, T.set_subtensor(prior_stsig[0,:],0.001))
st = prior_stmu + theano_rng.normal((n_s,n_samples))*prior_stsig
ost = T.nnet.relu( T.dot(Wl_ost_st,st) + bl_ost )
ost2 = T.nnet.relu( T.dot(Wl_ost2_ost,ost) + bl_ost2 )
ost3 = T.nnet.relu( T.dot(Wl_ost3_ost2,ost2) + bl_ost3 )
otmu = T.dot(Wl_otmu_st, ost3) + bl_otmu
otsig = T.nnet.softplus(T.dot(Wl_otsig_st, ost3) + bl_otsig) + sig_min_obs
ohtmu = T.dot(Wl_ohtmu_st, ost3) + bl_ohtmu
ohtsig = T.nnet.softplus( T.dot(Wl_ohtsig_st, ost3) + bl_ohtsig ) + sig_min_obs
oatmu = T.dot(Wl_oatmu_st, ost3) + bl_oatmu
oatsig = T.nnet.softplus( T.dot(Wl_oatsig_st, ost3) + bl_oatsig ) + sig_min_obs
ot = otmu + theano_rng.normal((n_o,n_samples))*otsig
oht = ohtmu + theano_rng.normal((n_oh,n_samples))*ohtsig
oat = oatmu + theano_rng.normal((n_oa,n_samples))*oatsig
return st, ohtmu, ohtsig, ot, oht, oat, prior_stmu, prior_stsig
# Define initial state and action
def get_train_function(self):
# specify the computational graph
num_param_vecs = T.scalar('num_param_vecs')
# weight = theano.shared(np.random.randn(len(self.feature_map), self.num_param_vecs), name='weight')
weight = theano.shared(np.zeros((len(self.feature_map), self.num_param_vecs)), name='weight')
feat_mat = sparse.csr_matrix(name='feat_mat')
pred = T.nnet.sigmoid( sparse.dot(feat_mat, weight) ) # one-vs-rest
o_pred = ifelse(T.gt(self.num_param_vecs, 1), pred / pred.sum(axis=1).reshape((pred.shape[0], 1)), T.concatenate( [pred, 1-pred], axis=1 ) )
f_target = T.matrix('f_target')
f_mask_mat = sparse.csr_matrix(name='f_mask_mat')
f_sum_pred = sparse.dot( f_mask_mat, o_pred )
f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1))
i_target = T.matrix('i_target')
i_mask_mat = sparse.csr_matrix(name='l_mask_mat')
i_pred = sparse.dot( i_mask_mat, pred )
# objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2
objective = 0.0 * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2
grad_weight = T.grad(objective, weight)
# print 'Compiling function ...'
# compile the function
train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] )
return train
def Recurrence(processed_frames, h0, reset):
"""
processed_frames.shape: (batch size, n frames, DIM)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames, DIM)
"""
# print "warning no recurrence"
# return T.zeros_like(processed_frames), h0
learned_h0 = lib.param(
'Recurrence.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (grus[-1], last_hidden)
def Recurrence(processed_frames, h0, reset):
"""
processed_frames.shape: (batch size, n frames, DIM)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames, DIM)
"""
# print "warning no recurrence"
# return T.zeros_like(processed_frames), h0
learned_h0 = lib.param(
'Recurrence.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (grus[-1], last_hidden)
def _forward(self):
eps = self.eps
param_size = (1, 1, self.n_output, 1, 1)
self.gamma = self.declare(param_size)
self.beta = self.declare(param_size)
mean = self.inpt.mean(axis=[0, 1, 3, 4], keepdims=False)
std = self.inpt.std(axis=[0, 1, 3, 4], keepdims=False)
self._setup_running_metrics(self.n_output)
self.running_mean.default_update = ifelse(
self.training,
(1.0 - self.alpha) * self.running_mean + self.alpha * mean,
self.running_mean
)
self.running_std.default_update = ifelse(
self.training,
(1.0 - self.alpha) * self.running_std + self.alpha * std,
self.running_std
)
# This will be optimized away, but ensures the running mean and the running std get updated.
# Reference: https://gist.github.com/f0k/f1a6bd3c8585c400c190#file-batch_norm-py-L86
mean += 0 * self.running_mean
std += 0 * self.running_std
use_mean = ifelse(self.training, mean, self.running_mean)
use_std = ifelse(self.training, std, self.running_std)
use_mean = use_mean.dimshuffle('x', 'x', 0, 'x', 'x')
use_std = use_std.dimshuffle('x', 'x', 0, 'x', 'x')
norm_inpt = (self.inpt - use_mean) / (use_std + eps)
self.output = self.gamma * norm_inpt + self.beta
def process_pre_post_w(padding_arr, zeros_arr):
argmax = T.argmax(padding_arr)
zeros_arr = ifelse(T.eq(padding_arr[argmax], 0), zeros_arr,
T.set_subtensor(zeros_arr[argmax-2:argmax+3], 1.5 / (T.sum(padding_arr[argmax-2:argmax+3]))))
return_arr = (zeros_arr * padding_arr)[2: -2]
return return_arr
def __call__(self, input):
mean = input.mean(self.axes, keepdims=True)
std = input.std(self.axes, keepdims=True) + self.epsilon
# Don't batchnoramlise a single data point
mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))
return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
def forward_batch(self, x, mask):
"""
:param x: (batch, length, dim)
:param mask: (batch, length, )
:return: (batch, length, hidden_dim)
"""
# conv_after_length = length - kernel + 2 * padding_size + 1
new_x = x
if self.padding_size > 0:
# (padding_size + length + padding_size, dim)
new_x = temporal_padding_3d(x, (self.padding_size, self.padding_size))
# (batch, conv_after_length)
mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=self.padding_size)
elif self.padding_size == 0:
# (batch, conv_after_length)
mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=0)
else:
raise RuntimeError("Dilation Rate >= 0")
# safe_x = temporal_padding_3d(x, (0, self.kernel_size - x.shape[1]))
# safe_mask = T.ones((x.shape[0], ), dtype=theano.config.floatX).dimshuffle([0, 'x'])
# !!! convert safe_mask from col to matrix
# safe_mask = T.unbroadcast(safe_mask, 1)
# x, mask = ifelse(T.gt(self.kernel_size - x.shape[1], 0),
# (safe_x, safe_mask),
# (new_x, mask))
# (batch, conv_after_length, hidden_dim)
conv_result = self.forward_conv_batch(new_x)
# new_x = Print(new_x)
# mask = Print()(mask)
pooling_result = get_pooling_batch(conv_result, mask, self.pooling)
dropout_out = dropout_from_layer(pooling_result, self.dropout)
return self.act.activate(dropout_out + self.b)
def test_not_lazy_if_inplace(self):
# Tests that if the outputs are scalars and the graph is big,
# we disable the inplace opt to speed up optimization
x = tensor.vector('x', dtype=self.dtype)
y = tensor.vector('y', dtype=self.dtype)
c = tensor.iscalar('c')
mode = theano.compile.get_mode(self.mode).excluding(
# Disable many opt to keep the graph big enough to disable
# the opt.
'fusion', 'local_add_canonizer',
'inplace', 'constant_folding', 'constant_folding')
y2 = reduce(lambda x, y: x + y, [y] + list(range(200)))
f = theano.function([c, x, y], ifelse(c, x, y2), mode=mode)
# For not inplace ifelse
ifnode = [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, IfElse)]
assert len(ifnode) == 1
assert not ifnode[0].op.as_view
rng = numpy.random.RandomState(utt.fetch_seed())
xlen = rng.randint(200)
ylen = rng.randint(200)
vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
assert numpy.allclose(vx, f(1, vx, vy))
assert numpy.allclose(vy + sum(range(200)), f(0, vx, vy))
def test_mixed_dtype(self):
x1 = tensor.vector('x1', dtype='int32')
x2 = tensor.vector('x2', dtype=self.dtype)
y1 = tensor.vector('y1', dtype='int32')
y2 = tensor.vector('y2', dtype=self.dtype)
c = tensor.iscalar('c')
f = theano.function([c, x1, x2, y1, y2],
ifelse(c, (x1, x2), (y1, y2)), mode=self.mode)
self.assertFunctionContains1(f, self.get_ifelse(2))
rng = numpy.random.RandomState(utt.fetch_seed())
xlen = rng.randint(200)
ylen = rng.randint(200)
vx1 = numpy.asarray(rng.uniform(size=(xlen,)) * 3, 'int32')
vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
vy1 = numpy.asarray(rng.uniform(size=(ylen,)) * 3, 'int32')
vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
o1, o2 = f(1, vx1, vx2, vy1, vy2)
assert numpy.allclose(vx1, o1)
assert numpy.allclose(vx2, o2)
o1, o2 = f(0, vx1, vx2, vy1, vy2)
assert numpy.allclose(vy1, o1)
assert numpy.allclose(vy2, o2)