def connect(self, inputs, mask, is_train):
""" is_train: A boolean tensor.
"""
max_length = inputs.shape[0]
batch_size = inputs.shape[1]
outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]
# Dropout mask sharing for variational dropout.
self.is_train = is_train
if self.recurrent_dropout_layer != None:
self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)
inputs = tensor.dot(inputs, self.W) + self.b
rval, _ = theano.scan(self._step, # Scan function
sequences=[inputs, mask], # Input sequence
outputs_info=outputs_info,
name=_p(self.prefix, '_layers'),
n_steps=max_length) # scan steps
return rval[0]
python类scan()的实例源码
def connect(self, inputs, mask, is_train):
max_length = inputs.shape[0]
batch_size = inputs.shape[1]
outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]
# Dropout layers
self.is_train = is_train
if self.recurrent_dropout_layer != None:
self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)
proj_inputs = tensor.dot(inputs, self.W) + self.b
rval, _ = theano.scan(self._step, # Scan function
sequences=[inputs, proj_inputs, mask], # Input sequence
outputs_info=outputs_info,
name=_p(self.prefix, '_layers'),
n_steps=max_length) # scan steps
return rval[0]
def sample_scan(self, x, sigma, n_steps, samples):
# Enable on-the-fly graph computations
# theano.config.compute_test_value = "raise"
in_val = T.fmatrix("input_values")
# in_val.tag.test_value = np.asarray(
# np.random.rand(1, 784), dtype=theano.config.floatX)
s_sigma = T.fscalr("sigma_values")
# s_sigma = np.asarray(
# np.random.rand(1), dtype=theano.config.floatX)
mode = "FAST_RUN"
values, updates = theano.scan(fn=self.sample_one_step,
outputs_info=in_val,
non_sequences=s_sigma,
n_steps=n_steps,
mode=mode)
ae_sampler = theano.function(inputs=[in_val, s_sigma],
outputs=values[-1],
updates=updates)
samples = ae_sampler(x, sigma)
return samples
def gru_layer(tparams, emb, options):
hiddenDimSize = options['hiddenDimSize']
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
def stepFn(wx, h, U_gru):
uh = T.dot(h, U_gru)
r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize))
z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize))
h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize))
h_new = z * h + ((1. - z) * h_tilde)
return h_new
Wx = T.dot(emb, tparams['W_gru']) + tparams['b_gru']
results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps)
return results
def get_output_for(self, inputs, **kwargs):
vals, ref = inputs
def filt(V, R):
if self.norm_type is not None:
o = tt.ones((1, V.shape[1], V.shape[2]), np.float32)
norm = gaussian_filter(R, o, self.kern_std, self.ref_dim)
norm = tt.sqrt(norm) if self.norm_type == "sym" else norm
norm += 1e-8
V = V / norm if self.norm_type in ["pre", "sym"] else V
F = gaussian_filter(R, V, self.kern_std)
return F / norm if self.norm_type in ["post", "sym"] else F
filtered = theano.scan(fn=filt, sequences=[vals, ref],
outputs_info=None)[0]
return filtered
def set_net_params(self):
'''Returns MLP parameters for scan.'''
super(GRU, self).set_net_params()
if self.input_net_aux is None:
self.input_net_aux = MLP(
self.dim_in, 2 * self.dim_h, 2 * self.dim_hs[0], 1,
rng=self.rng, trng=self.trng,
h_act='T.nnet.sigmoid', out_act='T.tanh',
name='input_net_aux')
else:
assert self.input_net_aux.dim_in == self.dim_in
assert self.input_net_aux.dim_out == 2 * self.dim_hs[0]
self.input_net_aux.name = self.name + '_input_net_aux'
self.nets.append(self.input_net_aux)
for i in xrange(self.n_layers - 1):
n = MLP(self.dim_hs[i], 2 * self.dim_hs[i+1],
rng=self.rng, trng=self.trng,
distribution='centered_binomial',
name='rnn_net_aux%d' % i)
self.inter_nets.append(n) #insert(2 * i + 1, n)
def step_call(self, x, h0, c0, condition_on, *params):
n_steps = x.shape[0]
n_samples = x.shape[1]
seqs = self.call_seqs(x, condition_on, *params)
outputs_info = [h0, c0]
non_seqs = self.get_recurrent_args(*params)
(h, c), updates = theano.scan(
self._step,
sequences=seqs,
outputs_info=outputs_info,
non_sequences=non_seqs,
name=self.name + '_recurrent_steps',
n_steps=n_steps,
strict=True)
o_params = self.get_output_args(*params)
out_net_out = self.output_net.step_call(h, *o_params)
preact = out_net_out['z']
p = out_net_out['p']
#y = self.output_net.sample(p=p)
return OrderedDict(h=h, p=p, z=preact), updates
def call_seqs(self, x, condition_on, level, *params):
'''Prepares the input for __call__
Args:
x (T.tensor): input
condtion_on (T.tensor or None): tensor to condition recurrence on.
level (int): reccurent level.
*params: list of theano.shared.
Returns:
list: list of scan inputs.
'''
if level == 0:
i_params = self.get_input_args(*params)
a = self.input_net.step_preact(x, *i_params)
else:
i_params = self.get_inter_args(level - 1, *params)
a = self.inter_nets[level - 1].step_preact(x, *i_params)
if condition_on is not None:
a += condition_on
return [a]
def shuffle_columns(x, srng):
'''Shuffles a tensor along the second index.
Args:
x (T.tensor).
srng (sharedRandomstream).
'''
def step_shuffle(m, perm):
return m[perm]
perm_mat = srng.permutation(n=x.shape[0], size=(x.shape[1],))
y, _ = scan(
step_shuffle, [x.transpose(1, 0, 2), perm_mat], [None], [], x.shape[1],
name='shuffle', strict=False)
return y.transpose(1, 0, 2)
def ctc_path_probs(predict, Y, alpha=1e-4):
smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
L = T.log(smoothed_predict)
zeros = T.zeros_like(L[0])
log_first = zeros
f_skip_idxs = ctc_create_skip_idxs(Y)
b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this
def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
return f_active_next, log_f_next, b_active_next, log_b_next
[f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])
idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
return log_probs, mask
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
Wx = T.dot(emb, tparams['W_'+layerIndex])
def stepFn(stepMask, wrx, wzx, wx, h):
r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
h_new = z * h + ((1. - z) * h_tilde)
h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
return h_new#, output, time
results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)
return results
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
Wx = T.dot(emb, tparams['W_'+layerIndex])
def stepFn(stepMask, wrx, wzx, wx, h):
r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
h_new = z * h + ((1. - z) * h_tilde)
h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
return h_new
results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)
return results
def model(inputs, _is_training, params, batch_size, hidden_size, drop_i, drop_s, init_scale, init_H_bias, _theano_rng):
noise_i_for_H = get_dropout_noise((batch_size, hidden_size), drop_i, _theano_rng)
i_for_H = ifelse(_is_training, inputs * noise_i_for_H, inputs)
i_for_H = linear.model(i_for_H, params, hidden_size, hidden_size, init_scale, bias_init=init_H_bias)
# Dropout noise for recurrent hidden state.
noise_s = get_dropout_noise((batch_size, hidden_size), drop_s, _theano_rng)
def step(i_for_H_t, y_tm1, noise_s):
s_lm1_for_H = ifelse(_is_training, y_tm1 * noise_s, y_tm1)
return T.tanh(i_for_H_t + linear.model(s_lm1_for_H, params, hidden_size, hidden_size, init_scale))
y_0 = shared_zeros((batch_size, hidden_size), name='h0')
y, _ = theano.scan(step, sequences=i_for_H, outputs_info=[y_0], non_sequences = [noise_s])
y_last = y[-1]
sticky_state_updates = [(y_0, y_last)]
return y, y_0, sticky_state_updates
def generative_sampling(self, seed, emb_data, sample_length):
fruit = theano.shared(value=seed)
def step(h_tm, y_tm):
h_t = self.activation(T.dot(emb_data[y_tm], self.W) +
T.dot(h_tm, self.U) + self.bh)
y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.by)
y = T.argmax(y_t, axis=1)
return h_t, y[0]
[_, samples], _ = theano.scan(fn=step,
outputs_info=[self.h0, fruit],
n_steps=sample_length)
get_samples = theano.function(inputs=[],
outputs=samples)
return get_samples()
def _labeling_batch_to_class_batch(y, y_labeling, num_classes,
y_hat_mask=None):
# FIXME: y_hat_mask is currently not used
batch_size = y.shape[1]
N = y_labeling.shape[0]
n_labels = y.shape[0]
# sum over all repeated labels
# from (T, B, L) to (T, C, B)
out = T.zeros((num_classes, batch_size, N))
y_labeling = y_labeling.dimshuffle((2, 1, 0)) # L, B, T
y_ = y
def scan_step(index, prev_res, y_labeling, y_):
res_t = T.inc_subtensor(prev_res[y_[index, T.arange(batch_size)],
T.arange(batch_size)],
y_labeling[index, T.arange(batch_size)])
return res_t
result, updates = theano.scan(scan_step,
sequences=[T.arange(n_labels)],
non_sequences=[y_labeling, y_],
outputs_info=[out])
# result will be (C, B, T) so we make it (T, B, C)
return result[-1].dimshuffle(2, 1, 0)
def ctc_path_probs(predict, Y, alpha=1e-4):
smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
L = T.log(smoothed_predict)
zeros = T.zeros_like(L[0])
log_first = zeros
f_skip_idxs = ctc_create_skip_idxs(Y)
b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this
def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
return f_active_next, log_f_next, b_active_next, log_b_next
[f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])
idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
return log_probs, mask
rcnn_class.py 文件源码
项目:Recurrent-Convolutional-Neural-Network
作者: monisjaved
项目源码
文件源码
阅读 84
收藏 0
点赞 0
评论 0
def get_cost(self, X, Y, X_sizes):
"""
Calculates cost for each values in mini batch, also
regularizes all the input parameters and then returns
final cost function as theano variable
"""
cost_fn, _ = theano.scan(
fn=self.get_likelihood,
sequences=[X, Y, X_sizes]
)
cost_fn = cost_fn.mean()
cost_fn += self.reg_lambda * T.sqr(self.W_c_r).sum() / 2.
cost_fn += self.reg_lambda * T.sqr(self.W_c_l).sum() / 2.
cost_fn += self.reg_lambda * T.sqr(self.W_conv).sum() / 2.
cost_fn += self.reg_lambda * T.sqr(self.W_output).sum() / 2.
cost_fn += self.reg_lambda * T.sqr(self.b_output).sum() / 2.
# Regularizing word embedding
cost_fn += self.reg_lambda * T.sqr(self.vector_dict).sum() / 2
return cost_fn
def get_output(self, train=False):
input = self.get_input(train)
proj_input = self.activation(T.tensordot(input, self.att_proj, axes=(3,0)))
if self.context == 'word':
att_scores = T.tensordot(proj_input, self.att_scorer, axes=(3, 0))
elif self.context == 'clause':
def step(a_t, h_tm1, W_in, W, sc):
h_t = T.tanh(T.tensordot(a_t, W_in, axes=(2,0)) + T.tensordot(h_tm1, W, axes=(2,0)))
s_t = T.tensordot(h_t, sc, axes=(2,0))
return h_t, s_t
[_, scores], _ = theano.scan(step, sequences=[proj_input.dimshuffle(2,0,1,3)], outputs_info=[T.zeros((proj_input.shape[0], self.td1, self.rec_hid_dim)), None], non_sequences=[self.rec_in_weights, self.rec_hid_weights, self.att_scorer])
att_scores = scores.dimshuffle(1,2,0)
elif self.context == 'para':
att_scores = T.tensordot(proj_input, self.att_scorer, axes=(3, 2)).sum(axis=(1, 2))
# Nested scans. For shame!
def get_sample_att(sample_input, sample_att):
sample_att_inp, _ = theano.scan(fn=lambda s_att_i, s_input_i: T.dot(s_att_i, s_input_i), sequences=[T.nnet.softmax(sample_att), sample_input])
return sample_att_inp
att_input, _ = theano.scan(fn=get_sample_att, sequences=[input, att_scores])
return att_input
def _ctc_normal(self, predict,labels):
n = labels.shape[0]
labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]]))
sec_diag = T.neq(labels2[:-2], labels2[2:]) * \
T.eq(labels2[1:-1], self.tpo["CTC_blank"])
recurrence_relation = \
T.eye(n) + \
T.eye(n, k=1) + \
T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x'))
pred_y = predict[:, labels]
probabilities, _ = theano.scan(
lambda curr, accum: curr * T.dot(accum, recurrence_relation),
sequences=[pred_y],
outputs_info=[T.eye(n)[0]]
)
labels_probab = T.sum(probabilities[-1, -2:])
return -T.log(labels_probab)
def sequence_iteration(self, in_seq, mask, use_dropout,dropout_value=1):
in_seq_d = T.switch(use_dropout,
(in_seq *
self.trng.binomial(in_seq.shape,
p=dropout_value, n=1,
dtype=in_seq.dtype)),
in_seq)
rz_in_seq = T.add( T.dot(in_seq_d, self.weights[0]) , self.weights[1] )
out_seq, updates = theano.scan(
fn=self.t_forward_step,
sequences=[mask, rz_in_seq], # in_seq_d],
outputs_info=[self.t_ol_t00],
non_sequences=[i for i in self.weights][2:] + [self.t_n_out],
go_backwards = self.go_backwards,
truncate_gradient=-1,
#n_steps=50,
strict=True,
allow_gc=False,
)
return out_seq
def _semi_lagrangian_displacement(self, v_sampled, grid_points, dt) :
"""
Semi-Lagrangian scheme.
Given a downsampled velocity field v (which will be linearly interpolated),
we find "where the information came from", i.e. numerically invert its
flow during a time-step dt on the 'grid_points'.
To do so, we simply solve the fixed point equation
a(y)/2 = (dt/2) * v( y - a(y)/2 )
by an "Picard-like" iterative scheme,
where y is a grid point, and -a(y) the corresponding "backward" vector.
"""
def f(r) :
return .5 * dt * self._linear_interp_downsampledfield(v_sampled, grid_points - r)
# Theano on GPU requires float32, i.e. explicit downcast from numpy float64 type :
r_0 = np.zeros((np.prod(self.image_shape), self.image_dimension), dtype = config.floatX)
result, updates = theano.scan(fn = f, # Iterated routine
outputs_info = [r_0], # Starting estimate for r
n_steps = 5) # Number of iterations, sufficient in practice
r_inf = result[-1] # We only keep the end result
return 2. * r_inf # displacement "alpha"
def _HamiltonianShootingCarrying(self, q, p, i0) :
"""
Given initial control points/momentums q0 and p0 given as n-by-d matrices,
and a "template" image i0, outputs the trajectories q_t, p_t, I_t = I0 \circ phi_{t->0}.
"""
# Here, we use the "scan" theano routine, which can be understood as a "for" loop
identity = T.as_tensor_variable(0. * self.dense_grid()) # We encode the identity as a null displacement field.
# Here, we use the "scan" theano routine, which can be understood as a "for" loop
result, updates = theano.scan(fn = lambda x,y,z : self._hamiltonian_step_carrying2(x,y,z),
outputs_info = [q,p, identity],
n_steps = int(np.round(1/self.dt) ))
phi_inv_1 = result[2][-1] # We do not store the intermediate results
I1 = self._image_circ_diffeo(i0, self.dense_grid() + phi_inv_1) # instead of interpolating the images It at all timesteps, we only do it in the end.
return [result[0][-1], result[1][-1], I1] # and only return the final state + momentum + image
def sym_logdensity(self, x):
""" x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
h = self.nonlinearity(a * activations_factor) # BxH
Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC
Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC
Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC
p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
return (p, a, x)
# First element is different (it is predicted from the bias only)
a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH
p0 = T.zeros_like(x[0])
x0 = T.ones_like(x[0])
([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
outputs_info=[p0, a0, x0])
return (ps[-1], updates)
def sym_logdensity(self, x):
""" x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
def density_given_previous_a_and_x(x, w, v, b, activations_factor, p_prev, a_prev, x_prev):
a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
h = self.nonlinearity(a * activations_factor) # BxH
t = T.dot(h, v) + b
p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995
p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one)
return (p, a, x)
# First element is different (it is predicted from the bias only)
a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH
p0 = T.zeros_like(x[0])
x0 = T.ones_like(x[0])
([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x,
sequences=[x, self.W, self.V, self.b, self.activation_rescaling],
outputs_info=[p0, a0, x0])
return (ps[-1], updates)
def get_y_prob(self, h, y):
"""
:param h: 1D: n_words, 2D: Batch, 3D: n_y
:param y: 1D: n_words, 2D: Batch
:return: gradient of cross entropy: 1D: Batch
"""
batch_index = T.arange(h.shape[1])
z_score0 = self.BOS + h[0] # 1D: batch, 2D: n_y
y_score0 = z_score0[batch_index, y[0]] # 1D: batch
[_, y_scores, z_scores], _ = theano.scan(fn=self._forward_step,
sequences=[h[1:], y[1:]],
outputs_info=[y[0], y_score0, z_score0],
non_sequences=[self.W_t, batch_index])
y_score = y_scores[-1]
z_score = logsumexp(z_scores[-1], axis=1).flatten()
return y_score - z_score
def get_layer(self, x_in):
assert x_in.ndim == 2
n_steps = x_in.shape[0]
def __slice(x_, n, dim):
return x_[n * dim: (n + 1) * dim]
def __step(x_, h_, c_):
preact = T.dot(h_, self._params['U']) + x_ + self._params['b']
i = T.nnet.sigmoid(__slice(preact, 0, self._ydim))
f = T.nnet.sigmoid(__slice(preact, 1, self._ydim))
o = T.nnet.sigmoid(__slice(preact, 2, self._ydim))
c = T.tanh(__slice(preact, 3, self._ydim))
c = f * c_ + i * c
h = o * T.tanh(c)
return h, c
x_in = T.dot(x_in, self._params['W']) + self._params['b']
rval, updates = theano.scan(__step, sequences=x_in, go_backwards=self.go_backwards,
outputs_info=[T.alloc(np_floatX(0.), self._ydim),
T.alloc(np_floatX(0.), self._ydim)],
name='lstm_layers', n_steps=n_steps)
return reverse(rval[0]) if self.go_backwards else rval[0]
def get_layer(self, x_in, C_in, ty_i): # op,
n_steps = C_in.shape[0]
def __logsumexp(x, axis=None):
xmax = x.max(axis=axis, keepdims=True)
xmax_ = x.max(axis=axis)
return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
def __step(_C, _x):
#scores = T.dot( T.dot(_x, self._params['U']) + self._params['b'], self._params['v0'])
scores = T.dot(T.nnet.sigmoid(T.dot(_x, self._params[
'U1']) + T.dot(_C, self._params['U2']) + self._params['b']), self._params['v0'])
return scores.flatten()
y_out, _ = theano.scan(
__step, sequences=C_in, non_sequences=x_in, name='classification_layer', n_steps=n_steps)
norm_y = y_out.flatten() - __logsumexp(y_out)
f_lc_debug = theano.function(
[x_in, C_in, ty_i], [y_out, norm_y, norm_y[ty_i]])
return norm_y[ty_i], T.argmax(norm_y), f_lc_debug
lstm_seqlabel_circuit_order_one_crf_decode_and_partition.py 文件源码
项目:neural_wfst
作者: se4u
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def tagged_sequence_unnormalized_score_in_order_one_crf(input_tv, y, l):
'''
Simply sum the log-scores along the path suggested by `y` in the `input_tv`
tensor.
Params
------
input_tv : A 3D tensor of (token, prev_pos, cur_pos) log scores.
the input_tv also contains scores of
y : The true sequence that was actually followed.
l : The score of (EOS | tag)
'''
def _score_step(o, y, p_, y_):
return ((p_ + o[y_, y]), y)
[rval, _], _ = theano.scan(_score_step,
sequences=[input_tv[1:, :-1], y[1:]],
#sequences=[input_tv, y],
outputs_info=[input_tv[0, -1, y[0]], y[0]],
#outputs_info=[0.0, numpy.int32(-1)],
name='OrderOnePathMax_scan_score_step',
strict=True)
return rval[-1] + l[y[-1]]
lstm_seqlabel_circuit_order_one_crf_decode_and_partition.py 文件源码
项目:neural_wfst
作者: se4u
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def retrieve_path_from_backpointers(bp, starting_point):
'''
Theano scan loop to follow backpointers, starting from a given spot.
Params
------
bp : The trail of backpointers. Think of this is as a list of
lists where we start from the back `bp = list[N][starting_point]` and
then go to list[N-1][bp] and so on.
starting_point :
'''
vp_prefix = th_reverse(
theano.scan(
lambda p, y: p[y],
sequences=bp,
outputs_info=starting_point,
go_backwards=True,
name='OrderOnePathMax_scan__bkpntr',
strict=True)[0])
return theano.tensor.concatenate([vp_prefix, starting_point.dimshuffle('x')])
def inner_fn_sample_actions_given(oat_given, stm1):
st0_condition = theano.shared(name = 'st0_condition', value = numpy.random.randn( n_s,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
ot0_condition = theano.shared(name = 'ot0_condition', value = numpy.random.randn( n_o,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
oht0_condition = theano.shared(name = 'oht0_condition', value = numpy.random.randn( n_oh,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
oat0_condition = theano.shared(name = 'oat0_condition', value = numpy.random.randn( n_oa,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
# Iterate MCMC sampler to approximate constrained probabilities
# p(o,oh|oa) of observations, given a sequence of proprioceptive
# inputs oa
# c.f. https://arxiv.org/abs/1401.4082, Appendix F.
((st, ot, oht, oat), _) = theano.scan(fn=inner_fn_condition,
outputs_info=[st0_condition, ot0_condition, oht0_condition, oat0_condition],
non_sequences=[oat_given, stm1],
n_steps=n_iterations_ag)
st = st[-1]
ot = ot[-1]
oht = oht[-1]
oat = oat[-1]
return st, ot, oht, oat
# Define initial state and action