def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
python类swapaxes()的实例源码
def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
def encode(self, X, skip_mask=None):
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.encoder_embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask)
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask)
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
last_hidden_states = []
for layer_index in range(0, self.num_layers):
encoder = self.get_encoder(layer_index)
last_hidden_states.append(encoder.get_last_hidden_state())
return last_hidden_states
def __call__(self, X, return_last=False):
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_layer(0, enmbedding)
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1]) # dense conv
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv
if return_last:
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))
return out_data
def __call__(self, X, return_last=False):
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0
out_data = self._forward_layer(0, enmbedding)
for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block):
out_data = self._forward_layer(layer_index, out_data)
if (layer_index + 1) % self.num_layers_per_block == 0:
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data += residual_input
residual_input = out_data
if return_last:
out_data = out_data[:, :, -1, None]
out_data = self.dense(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))
return out_data
def propdown(self, hid):
""" This function propagates the hidden units activation downwords to the visible units
:param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample
:return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
"""
batch_size = hid.data.shape[0]
if self.real == 0:
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
# F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
v_mean = F.sigmoid(pre_sigmoid_activation)
#print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
#print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
#print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
#print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
#print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
#print('v_mean', v_mean.data.shape)
#print('v_mean sum', F.sum(v_mean).data)
#print('hid', hid.data.shape)
else:
# TODO: check
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
return v_mean
def reconstruct(self, v):
"""
:param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
:return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
"""
batch_size = v.data.shape[0]
xp = cuda.get_array_module(v.data)
if self.real == 0:
h = F.sigmoid(self.conv(v))
else:
std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
h = F.sigmoid(self.conv(v / std_ch))
# F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
# = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
return reconstructed_v
def encode(self, x_input, x_query, answer):
m = self.encode_input(x_input)
u = self.encode_query(x_query)
# print "m.data.shape", m.data.shape
# print "u.data.shape", u.data.shape
mu = functions.matmul(m, u, transb=True)
# print "mu.data.shape", mu.data.shape
# print "mu.data", mu.data
p = functions.softmax(mu)
c = self.encode_output(x_input)
# print "p.data.shape:", p.data.shape
# print "c.data.shape:", c.data.shape
# print "functions.swapaxes(c ,2, 1):", functions.swapaxes(c ,2, 1).data.shape
o = functions.matmul(functions.swapaxes(c ,1, 0), p) # (2, 50, 1)
o = functions.swapaxes(o ,1, 0) # (2, 50)
# print "u.data.shape:", u.data.shape
# print "o.data.shape:", o.data.shape
# print "u.data.shape:", u.data
# print "o.data.shape:", o.data
# print (u+o).data.shape
predict = self.W(u + o)
# print predict.data.shape
loss = functions.softmax_cross_entropy(predict, answer)
return loss
def pre(self, x):
dims = len(x.shape) - 1
if self.kernel_size == 1:
ret = self.W(x)
elif self.kernel_size == 2:
if dims == 2:
xprev = Variable(
self.xp.zeros((self.batch_size, 1, self.in_size),
dtype=np.float32), volatile='AUTO')
xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
else:
xtminus1 = self.x
ret = self.W(x) + self.V(xtminus1)
else:
ret = F.swapaxes(self.conv(
F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)
if not self.attention:
return ret
if dims == 1:
enc = self.encoding[:, -1, :]
else:
enc = self.encoding[:, -1:, :]
return sum(F.broadcast(self.U(enc), ret))
def __call__(self, x):
return functions.swapaxes(x, self.axis1, self.axis2)
def __call__(self, x, split_into_variables=True, discard_context=False):
batchsize = x.shape[0]
seq_length = x.shape[3]
# conv
out_data = self.conv_blocks(x)
out_data = functions.reshape(out_data, (batchsize, -1, seq_length))
# rnn
for index, blocks in enumerate(self.rnn_blocks.blocks):
sru = blocks[0]
dropout = blocks[1] if len(blocks) == 2 else None
hidden, cell, context = sru(out_data, self.contexts[index])
if discard_context is False:
self.contexts[index] = context
if dropout is not None:
out_data = dropout(out_data)
# fc
out_data = self.dense_blocks(out_data)
assert out_data.shape[2] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.squeeze(out_data, axis=2)
return out_data
def decode(self, X, encoder_last_hidden_states, return_last=False):
assert len(encoder_last_hidden_states) == self.num_layers
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.decoder_embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0])
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index])
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
if return_last:
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))
return out_data
def decode_one_step(self, X, encoder_last_hidden_states):
assert len(encoder_last_hidden_states) == self.num_layers
batchsize = X.shape[0]
seq_length = X.shape[1]
ksize = self.decoder_kernel_size
if seq_length < ksize:
self.reset_state()
return self.decode(X, encoder_last_hidden_states, return_last=True)
xt = X[:, -ksize:]
enmbedding = self.decoder_embed(xt)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_decoder_layer_one_step(0, enmbedding, encoder_last_hidden_states[0])
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_decoder_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index])
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))
return out_data
def encode(self, X, skip_mask=None):
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.encoder_embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask)
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask)
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
last_hidden_states = []
last_layer_outputs = None
for layer_index in range(0, self.num_layers):
encoder = self.get_encoder(layer_index)
last_hidden_states.append(encoder.get_last_hidden_state())
last_layer_outputs = encoder.get_all_hidden_states()
return last_hidden_states, last_layer_outputs
def decode(self, X, encoder_last_hidden_states, encoder_last_layer_outputs, encoder_skip_mask=None, return_last=False):
assert len(encoder_last_hidden_states) == self.num_layers
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.decoder_embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0], encoder_last_layer_outputs, encoder_skip_mask)
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index], encoder_last_layer_outputs, encoder_skip_mask)
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
if return_last:
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))
return out_data
def forward_one_step(self, X):
batchsize = X.shape[0]
seq_length = X.shape[1]
ksize = self.kernel_size
if seq_length < ksize:
self.reset_state()
return self.__call__(X, return_last=True)
xt = X[:, -ksize:]
enmbedding = self.embed(xt)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:]
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1])[:, :, -ksize:] # dense conv
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = out_data[..., -1, None]
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))
return out_data
def check_forward(self, x_data):
axis1, axis2 = self.axis1, self.axis2
x = chainer.Variable(x_data)
y = functions.swapaxes(x, axis1, axis2)
self.assertEqual(y.data.dtype, self.dtype)
self.assertTrue((self.x.swapaxes(axis1, axis2) ==
cuda.to_cpu(y.data)).all())
def check_backward(self, x_data):
x = chainer.Variable(x_data)
y = functions.swapaxes(x, self.axis1, self.axis2)
y.grad = y.data
y.backward()
gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def __call__(self, x):
return functions.swapaxes(x, self.axis1, self.axis2)
def __call__(self, S, h):
batch_size, src_len, hidden_size = S.data.shape
h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len))
h = F.swapaxes(h, 1, 2)
S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size))
a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len)))
return a
def read(self, h):
#M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
M_key = F.stack(self.key_buff, axis=1) # (B, M, m)
self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M)
#p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M)
#print("p", p.shape)
#M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
M_val = F.stack(self.val_buff, axis=1) # (B, M, m)
#print("M_val", M_val.shape)
o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m)
o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m)
#print("o", o.shape)
return o, self.p
def forward_one_step(self, X):
batchsize = X.shape[0]
seq_length = X.shape[1]
ksize = self.kernel_size
if seq_length < ksize:
self.reset_state()
return self.__call__(X, return_last=True)
xt = X[:, -ksize:]
enmbedding = self.embed(xt)
enmbedding = F.swapaxes(enmbedding, 1, 2)
residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0
out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:]
for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block):
out_data = self._forward_layer_one_step(layer_index, out_data)[:, :, -ksize:]
if (layer_index + 1) % self.num_layers_per_block == 0:
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data += residual_input
residual_input = out_data
out_data = out_data[..., -1, None]
out_data = self.dense(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))
return out_data
def pre(self, x):
dims = len(x.shape) - 1
if self.kernel_size == 1:
ret = self.W(x)
elif self.kernel_size == 2:
if dims == 2:
xprev = Variable(
self.xp.zeros((self.batch_size, 1, self.in_size),
dtype=np.float32), volatile='AUTO')
xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
else:
xtminus1 = self.x
ret = self.W(x) + self.V(xtminus1)
else:
ret = F.swapaxes(self.conv(
F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)
if not self.attention:
return ret
if dims == 1:
enc = self.encoding[:, -1, :]
else:
enc = self.encoding[:, -1:, :]
return sum(F.broadcast(self.U(enc), ret))
def __array(array, dtype, is_volatile, transposition):
volatile = "ON" if is_volatile else "OFF"
if not transposition:
return Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile)
else:
return F.swapaxes(Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile), 0, 1)
def __array(array, dtype, is_volatile, transposition):
volatile = "ON" if is_volatile else "OFF"
if not transposition:
return Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile)
else:
return F.swapaxes(Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile), 0, 1)
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
pad = self._kernel_size - 1
WX = self.W(X)
if pad > 0:
WX = WX[:, :, :-pad]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
self.contexts = []
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if t == 0:
ct = (1 - f) * z
self.contexts.append(ct)
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if t == 0:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
pad = self._kernel_size - 1
WX = self.W(X)[:, :, -pad-1, None]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if self.contexts is None:
ct = (1 - f) * z
self.contexts = [ct]
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t - T]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if self.H is None:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate):
if args.gpu_enc != args.gpu_dec: # enc?dec??GPU???
chainer.cuda.get_device(args.gpu_enc).use()
encLen = len(sentence) # ??
cMBSize = len(sentence[0]) # minibatch size
# ?????embedding??? ??????????
encEmbList = self.getEncoderInputEmbeddings(sentence, args)
flag_train = (train_mode > 0)
lstmVars = [0] * self.n_layers * 2
if self.flag_merge_encfwbw == 0: # fw?bw??????????????
hyf, cyf, fwHout = self.model.encLSTM_f(
None, None, encEmbList, flag_train, args) # ???
hyb, cyb, bkHout = self.model.encLSTM_b(
None, None, encEmbList[::-1], flag_train, args) # ???
for z in six.moves.range(self.n_layers):
lstmVars[2 * z] = cyf[z] + cyb[z]
lstmVars[2 * z + 1] = hyf[z] + hyb[z]
elif self.flag_merge_encfwbw == 1: # fw?bw????????
sp = (cMBSize, self.hDim)
for z in six.moves.range(self.n_layers):
if z == 0: # ??? embedding???
biH = encEmbList
else: # ????? ????????
# ????????bkHout????????????
biH = fwHout + bkHout[::-1]
# z?????
hyf, cyf, fwHout = self.model.encLSTM_f(
z, biH, flag_train, dropout_rate, args)
# z??????
hyb, cyb, bkHout = self.model.encLSTM_b(
z, biH[::-1], flag_train, dropout_rate, args)
# ??????????????????????????
# ???????
lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp)
lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp)
else:
assert 0, "ERROR"
# ?????
if self.flag_enc_boseos == 0: # default
# fwHout?[:,]???????????
biHiddenStack = fwHout[:, ] + bkHout[::-1]
elif self.flag_enc_boseos == 1:
bkHout2 = bkHout[::-1] # ?????
biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ]
# BOS, EOS?????? TODO ??????0??????????
encLen -= 2
else:
assert 0, "ERROR"
# (enc????, minibatch??, ??????)
# => (minibatch??, enc????, ??????)???
biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1)
# ?LSTM???????????decoder?LSTM????????
lstmVars = chaFunc.stack(lstmVars)
# encoder????encInfoObject???????
retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize)
return retO