def decode_one_step(self, X, encoder_last_hidden_states):
assert len(encoder_last_hidden_states) == self.num_layers
batchsize = X.shape[0]
seq_length = X.shape[1]
ksize = self.decoder_kernel_size
if seq_length < ksize:
self.reset_state()
return self.decode(X, encoder_last_hidden_states, return_last=True)
xt = X[:, -ksize:]
enmbedding = self.decoder_embed(xt)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_decoder_layer_one_step(0, enmbedding, encoder_last_hidden_states[0])
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_decoder_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index])
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))
return out_data
评论列表
文章目录