def compute(self, state, w_idx, feat, scene):
# word embedding
word_vec = self.embedding.compute(w_idx)
# split states
e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
# attention
e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
# lstm step
e_w_s = T.concatenate([e_t, word_vec, scene], axis=-1)
c_t, h_t = self.lstm.compute(e_w_s, c_tm1, h_tm1)
# merge state
new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
# add w_{t-1} as feature
e_h_w_s = T.concatenate([e_t, h_t, word_vec, scene], axis=-1)
# predict probability
p = self.pred_mlp.compute(e_h_w_s)
return new_state, p, alpha
python类concatenate()的实例源码
def build_encoder_bi(tparams, options):
"""
build bidirectional encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
embeddingr = embedding[::-1]
x_mask = tensor.matrix('x_mask', dtype='float32')
xr_mask = x_mask[::-1]
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
prefix='encoder_r',
mask=xr_mask)
ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)
return embedding, x_mask, ctx
# some utilities
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
parameter init for GRU
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def build_encoder_bi(tparams, options):
"""
build bidirectional encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
embeddingr = embedding[::-1]
x_mask = tensor.matrix('x_mask', dtype='float32')
xr_mask = x_mask[::-1]
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
prefix='encoder_r',
mask=xr_mask)
ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)
return embedding, x_mask, ctx
# some utilities
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
parameter init for GRU
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def get_output_for(self, input, deterministic=False, **kwargs):
def _phase_shift(input,r):
bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r
X = T.reshape(input, (bsize,r,r,a,b))
X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1
X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r]
X = [T.reshape(x,(bsize,b,r,r))for x in X]
X = T.concatenate(X,axis=2) # bsize, b, a*r, r
X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r]
X = [T.reshape(x,(bsize,a*r,r))for x in X]
X = T.concatenate(X,axis=2) # bsize, a*r, b*r
return X.dimshuffle(0,'x',1,2)
Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1)
return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1)
# Multiscale Dilated Convolution Block
# This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers.
# Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv).
# The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales,
# meaning that were starting by taking an elementwise mean. These should be learnable parameters.
# NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list.
# - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path!
# now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
if latent_var is None:
latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed!
latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])
# generate the generalized input (append latents to obs.)
if self.bilinear_integration:
extended_obs_var = TT.concatenate([obs_var, latent_var,
TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
outdim=2)]
, axis=1)
else:
extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def update_opt(self, f, target, inputs, reg_coeff):
self.target = target
self.reg_coeff = reg_coeff
params = target.get_params(trainable=True)
constraint_grads = theano.grad(
f, wrt=params, disconnected_inputs='warn')
xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
def Hx_plain():
Hx_plain_splits = TT.grad(
TT.sum([TT.sum(g * x)
for g, x in zip(constraint_grads, xs)]),
wrt=params,
disconnected_inputs='warn'
)
return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])
self.opt_fun = ext.lazydict(
f_Hx_plain=lambda: ext.compile_function(
inputs=inputs + xs,
outputs=Hx_plain(),
log_name="f_Hx_plain",
),
)
def get_action(self, observation):
if self.state_include_action:
if self.prev_action is None:
prev_action = np.zeros((self.action_space.flat_dim,))
else:
prev_action = self.action_space.flatten(self.prev_action)
all_input = np.concatenate([
self.observation_space.flatten(observation),
prev_action
])
else:
all_input = self.observation_space.flatten(observation)
# should not be used
prev_action = np.nan
probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])]
action = special.weighted_sample(probs, range(self.action_space.n))
self.prev_action = action
self.prev_hidden = hidden_vec
agent_info = dict(prob=probs)
if self.state_include_action:
agent_info["prev_action"] = prev_action
return action, agent_info
def get_action(self, observation):
if self._state_include_action:
if self._prev_action is None:
prev_action = np.zeros((self.action_space.flat_dim,))
else:
prev_action = self.action_space.flatten(self._prev_action)
all_input = np.concatenate([
self.observation_space.flatten(observation),
prev_action
])
else:
all_input = self.observation_space.flatten(observation)
# should not be used
prev_action = np.nan
mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])]
rnd = np.random.normal(size=mean.shape)
action = rnd * np.exp(log_std) + mean
self._prev_action = action
self._prev_hidden = hidden_vec
agent_info = dict(mean=mean, log_std=log_std)
if self._state_include_action:
agent_info["prev_action"] = prev_action
return action, agent_info
def __init__(self, incoming, unchanged_W, unchanged_W_shape,
oov_in_train_W, oov_in_train_W_shape,
p=0.5, rescale=True, dropout_mask=None,
**kwargs):
super(CustomEmbedding, self).__init__(incoming, **kwargs)
self.output_size = unchanged_W_shape[1]
self.unchanged_W = self.add_param(unchanged_W, unchanged_W_shape,
name="unchanged_W",
trainable=False,
regularizable=False)
self.oov_in_train_W = self.add_param(oov_in_train_W,
oov_in_train_W_shape, name='oov_in_train_W')
self.W = T.concatenate([self.unchanged_W, self.oov_in_train_W])
self.p = p
self.rescale = rescale
if dropout_mask is None:
dropout_mask = RandomStreams(_rng.randint(1, 2147462579)).binomial(self.W.shape,
p=1 - self.p,
dtype=self.W.dtype)
self.dropout_mask = dropout_mask
def forward(self, inputtensor):
#print('resnet.forward.shape: {}'.format(inputtensor[0].ndim))
o1 = self.conv1.forward(inputtensor)
o2 = self.bn1.forward(o1)
o3 = self.relu1.forward(o2)
o4 = self.conv2.forward(o3)
o5 = self.bn2.forward(o4)
if self.increaseDim:
subx = T.signal.pool.pool_2d(inputtensor[0], (2,2), ignore_border=True)
#print('resnet.forward.subx.ndim: {}'.format(subx.ndim))
retx = T.zeros_like(subx)
#print('resnet.forward.retx.ndim: {}'.format(retx.ndim))
sumx = T.concatenate([subx, retx], axis=1)
#print('resnet.forward.sumx.ndim: {}'.format(sumx.ndim))
out = self.relu2.forward([o5[0]+sumx,])
#print('resnet.forward.out.ndim: {}'.format(out[0].ndim))
else:
out = self.relu2.forward([o5[0]+inputtensor[0],])
return out
def build_encoder_bi(tparams, options):
"""
build bidirectional encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
embeddingr = embedding[::-1]
x_mask = tensor.matrix('x_mask', dtype='float32')
xr_mask = x_mask[::-1]
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
prefix='encoder_r',
mask=xr_mask)
ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)
return embedding, x_mask, ctx
# some utilities
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
parameter init for GRU
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
2-train_dcgan.py 文件源码
项目:Deep-Learning-with-Theano
作者: PacktPublishing
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def gen_samples(n, nbatch=128):
samples = []
labels = []
n_gen = 0
for i in range(n/nbatch):
ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
xmb = _gen(zmb, ymb)
samples.append(xmb)
labels.append(np.argmax(ymb, axis=1))
n_gen += len(xmb)
n_left = n-n_gen
ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
xmb = _gen(zmb, ymb)
samples.append(xmb)
labels.append(np.argmax(ymb, axis=1))
return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
conjugate_gradient_optimizer.py 文件源码
项目:rllabplusplus
作者: shaneshixiang
项目源码
文件源码
阅读 54
收藏 0
点赞 0
评论 0
def update_opt(self, f, target, inputs, reg_coeff):
self.target = target
self.reg_coeff = reg_coeff
params = target.get_params(trainable=True)
constraint_grads = theano.grad(
f, wrt=params, disconnected_inputs='warn')
xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
def Hx_plain():
Hx_plain_splits = TT.grad(
TT.sum([TT.sum(g * x)
for g, x in zip(constraint_grads, xs)]),
wrt=params,
disconnected_inputs='warn'
)
return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])
self.opt_fun = ext.lazydict(
f_Hx_plain=lambda: ext.compile_function(
inputs=inputs + xs,
outputs=Hx_plain(),
log_name="f_Hx_plain",
),
)
def get_action(self, observation):
if self.state_include_action:
if self.prev_action is None:
prev_action = np.zeros((self.action_space.flat_dim,))
else:
prev_action = self.action_space.flatten(self.prev_action)
all_input = np.concatenate([
self.observation_space.flatten(observation),
prev_action
])
else:
all_input = self.observation_space.flatten(observation)
# should not be used
prev_action = np.nan
probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])]
action = special.weighted_sample(probs, range(self.action_space.n))
self.prev_action = action
self.prev_hidden = hidden_vec
agent_info = dict(prob=probs)
if self.state_include_action:
agent_info["prev_action"] = prev_action
return action, agent_info
def get_action(self, observation):
if self._state_include_action:
if self._prev_action is None:
prev_action = np.zeros((self.action_space.flat_dim,))
else:
prev_action = self.action_space.flatten(self._prev_action)
all_input = np.concatenate([
self.observation_space.flatten(observation),
prev_action
])
else:
all_input = self.observation_space.flatten(observation)
# should not be used
prev_action = np.nan
mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])]
rnd = np.random.normal(size=mean.shape)
action = rnd * np.exp(log_std) + mean
self._prev_action = action
self._prev_hidden = hidden_vec
agent_info = dict(mean=mean, log_std=log_std)
if self._state_include_action:
agent_info["prev_action"] = prev_action
return action, agent_info
def __init__(self, prev_layers, axis=1):
"""
list of prev layers to concatenate
axis to concatenate
For tensor5, channel dimension is axis=2 (due to theano conv3d
convention). For image, axis=1
"""
assert (len(prev_layers) > 1)
super().__init__(prev_layers[0])
self._axis = axis
self._prev_layers = prev_layers
self._output_shape = self._input_shape.copy()
for prev_layer in prev_layers[1:]:
self._output_shape[axis] += prev_layer._output_shape[axis]
print('Concat the prev layer to [%s]' % ','.join(str(x) for x in self._output_shape))
def project3Dto2D(self, Li, idxs):
"""
Project 3D point to 2D
:param Li: joints in normalized 3D
:param idxs: frames specified by subset
:return: 2D points, in normalized 2D coordinates
"""
if not isinstance(idxs, numpy.ndarray):
idxs = numpy.asarray([idxs])
# 3D -> 2D projection also shift by M to cropped window
Li_glob3D = (numpy.reshape(Li, (len(idxs), self.numJoints, 3))*self.Di_scale[idxs][:, None, None]+self.Di_off3D[idxs][:, None, :]).reshape((len(idxs)*self.numJoints, 3))
Li_glob3D_hom = numpy.concatenate([Li_glob3D, numpy.ones((len(idxs)*self.numJoints, 1), dtype='float32')], axis=1)
Li_glob2D_hom = numpy.dot(Li_glob3D_hom, self.cam_proj.T)
Li_glob2D = (Li_glob2D_hom[:, 0:3] / Li_glob2D_hom[:, 3][:, None]).reshape((len(idxs), self.numJoints, 3))
Li_img2D_hom = numpy.einsum('ijk,ikl->ijl', Li_glob2D, self.Di_trans2D[idxs])
Li_img2D = (Li_img2D_hom[:, :, 0:2] / Li_img2D_hom[:, :, 2][:, :, None]).reshape((len(idxs), self.numJoints*2))
Li_img2Dcrop = (Li_img2D - (self.Di.shape[3]/2.)) / (self.Di.shape[3]/2.)
return Li_img2Dcrop
def _add_blanks(y, blank_symbol, y_mask=None):
"""Add blanks to a matrix and updates mask
Input shape: output_seq_len x num_batch
Output shape: 2*output_seq_len+1 x num_batch
"""
# for y
y_extended = y.T.dimshuffle(0, 1, 'x')
blanks = tensor.zeros_like(y_extended) + blank_symbol
concat = tensor.concatenate([y_extended, blanks], axis=2)
res = concat.reshape((concat.shape[0],
concat.shape[1] * concat.shape[2])).T
begining_blanks = tensor.zeros((1, res.shape[1])) + blank_symbol
blanked_y = tensor.concatenate([begining_blanks, res], axis=0)
# for y_mask
if y_mask is not None:
y_mask_extended = y_mask.T.dimshuffle(0, 1, 'x')
concat = tensor.concatenate([y_mask_extended,
y_mask_extended], axis=2)
res = concat.reshape((concat.shape[0],
concat.shape[1] * concat.shape[2])).T
begining_blanks = tensor.ones((1, res.shape[1]), dtype=floatX)
blanked_y_mask = tensor.concatenate([begining_blanks, res], axis=0)
else:
blanked_y_mask = None
return blanked_y.astype('int32'), blanked_y_mask
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def get_output_for(self, input, **kwargs):
input_shape = input.shape
if self.dilation[0] > 1:
# pad such that the time axis length is divisible by the dilation factor
pad_w = (self.dilation[0] - input_shape[2] % self.dilation[0]) % self.dilation[0]
input = T.concatenate((input, T.zeros((input_shape[0], input_shape[1], pad_w, input_shape[3]), input.dtype)), axis=2)
# rearrange data to fold the time axis into the minibatch dimension
input = input.reshape((input_shape[0], input_shape[1], -1, self.dilation[0], input_shape[3]))
input = input.transpose(0, 3, 1, 2, 4)
input = input.reshape((-1,) + tuple(input.shape[2:]))
output = super(TimeDilatedMaxPool2DLayer, self).get_output_for(input, **kwargs)
if self.dilation[0] > 1:
# restore the time axis from the minibatch dimension
output = output.reshape((input_shape[0], self.dilation[0]) + tuple(output.shape[1:]))
output = output.transpose(0, 2, 3, 1, 4)
output = output.reshape((input_shape[0], output.shape[1], -1, output.shape[4]))
# remove the padding
output = output[:, :, :output.shape[2] - pad_w]
return output
def _ctc_normal(self, predict,labels):
n = labels.shape[0]
labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]]))
sec_diag = T.neq(labels2[:-2], labels2[2:]) * \
T.eq(labels2[1:-1], self.tpo["CTC_blank"])
recurrence_relation = \
T.eye(n) + \
T.eye(n, k=1) + \
T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x'))
pred_y = predict[:, labels]
probabilities, _ = theano.scan(
lambda curr, accum: curr * T.dot(accum, recurrence_relation),
sequences=[pred_y],
outputs_info=[T.eye(n)[0]]
)
labels_probab = T.sum(probabilities[-1, -2:])
return -T.log(labels_probab)
def visualize_weights(self, layer, imsize, layout):
"""
Displays the weights of a specified layer as images.
:param layer: the layer whose weights to display
:param imsize: the image size
:param layout: number of rows and columns for each page
:return: none
"""
if layer < self.net.n_layers:
self.net.visualize_weights(layer, imsize, layout)
elif layer == self.net.n_layers:
helper.disp_imdata(np.concatenate([W.get_value() for W in [self.Wa] + self.Wms + self.WUs], axis=1).T, imsize, layout)
plt.show(block=False)
else:
raise ValueError('Layer {} doesn\'t exist.'.format(layer))
def visualize_activations(self, x):
"""
Visualizes the activations in the mdn caused by a given data minibatch.
:param x: a minibatch of data
:return: none
"""
self.net.visualize_activations(x)
forwprop = theano.function(
inputs=[self.input],
outputs=[self.a, tt.concatenate(self.ms, axis=1) + tt.concatenate([tt.reshape(U, [U.shape[0], -1]) for U in self.Us], axis=1)]
)
activations = forwprop(x.astype(dtype))
for a, title in izip(activations, ['mixing coefficients', 'means', 'scale matrices']):
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.imshow(a, cmap='gray', interpolation='none')
ax.set_title(title)
ax.set_xlabel('layer units')
ax.set_ylabel('data points')
plt.show(block=False)
def randomize_parameters(params, sigmas, sig_min_perturbations):
r_params = []
r_epsilons = []
for i in range(len(params)):
epsilon_half = theano_rng.normal((n_perturbations/2,params[i].shape[1],params[i].shape[2]), dtype = theano.config.floatX)
r_epsilon = T.concatenate( [epsilon_half, -1.0*epsilon_half], axis = 0 )
r_param = params[i] + r_epsilon*(T.nnet.softplus( sigmas[i] ) + sig_min_perturbations)
r_params.append(r_param)
r_epsilons.append(r_epsilon)
return r_params, r_epsilons
####################################################################
#
# Create randomly perturbed version of parameters
#
####################################################################
def renet_layer_lr_noscan(X, rnn1, rnn2, w, h, wp, hp):
list_of_images = []
for i in xrange(h/hp):
# x = X[:,i*hp:(i*hp + hp),:].dimshuffle((2, 0, 1)).flatten().reshape((w/wp, X.shape[0]*wp*hp))
h_tm1 = rnn1.H0
hr_tm1 = rnn2.H0
h1 = []
h2 = []
for j in xrange(w/wp):
x = X[:,i*hp:(i*hp + hp),j*wp:(j*wp + wp)].flatten()
h_t = rnn1.recurrence(x, h_tm1)
h1.append(h_t)
h_tm1 = h_t
jr = w/wp - j - 1
xr = X[:,i*hp:(i*hp + hp),jr*wp:(jr*wp + wp)].flatten()
hr_t = rnn2.recurrence(x, hr_tm1)
h2.append(hr_t)
hr_tm1 = hr_t
img = T.concatenate([h1, h2])
list_of_images.append(img)
return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
def renet_layer_lr_allscan(X, rnn1, rnn2, w, h, wp, hp):
# list_of_images = []
C = X.shape[0]
X = X.dimshuffle((1, 0, 2)).reshape((h/hp, hp*C*w)) # split the rows for the first scan
def rnn_pass(x):
x = x.reshape((hp, C, w)).dimshuffle((2, 1, 0)).reshape((w/wp, C*wp*hp))
h1 = rnn1.output(x)
h2 = rnn2.output(x, go_backwards=True)
img = T.concatenate([h1.T, h2.T])
# list_of_images.append(img)
return img
results, _ = theano.scan(
fn=rnn_pass,
sequences=X,
outputs_info=None,
n_steps=h/hp,
)
return results.dimshuffle((1, 0, 2))
# return T.stacklists(list_of_images).dimshuffle((1, 0, 2))