def _attention(self, prev_decoder_state, prev_embedding):
with tf.variable_scope('attention') as scope:
# e = score of shape [batch_size, output_seq_length, input_seq_length], e_{ij} = score(s_{i-1}, h_j)
# e_i = score of shape [batch_size, input_seq_length], e_ij = score(prev_decoder_state, h_j)
e_i = self._score(prev_decoder_state, prev_embedding)
# alpha_i = softmax(e_i) of shape [batch_size, input_seq_length]
alpha_i = tf.nn.softmax(e_i)
resized_alpha_i = tf.reshape(tf.tile(alpha_i, [1, self.encoder_output_size]),
[self.batch_size, -1, self.encoder_output_size])
if self.mode == 'decode':
c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.pre_computed_encoder_states_placeholder), axis=1)
else:
c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.encoder_outputs), axis=1)
return c_i, e_i
评论列表
文章目录