def _attention(query, attn_states, is_training, reuse, attn_size, attn_vec_size, attn_length, trainable=True, name='attention'):
with tf.variable_scope(name, reuse=reuse):
v = tf.get_variable(
name="V", shape=[attn_vec_size], trainable=trainable)
attn_states_reshaped = tf.reshape(
attn_states, shape=[-1, attn_length, 1, attn_size])
attn_conv = conv2d(attn_states_reshaped, attn_vec_size, is_training, reuse, filter_size=(
1, 1), stride=(1, 1), trainable=trainable, use_bias=False)
y = _linear(query, attn_vec_size, reuse)
y = tf.reshape(y, [-1, 1, 1, attn_vec_size])
s = tf.reduce_sum(v * tf.tanh(attn_conv + y), [2, 3])
a = softmax(s)
d = tf.reduce_sum(tf.reshape(
a, [-1, attn_length, 1, 1]) * attn_states_reshaped, [1, 2])
new_attns = tf.reshape(d, [-1, attn_size])
new_attn_states = tf.slice(attn_states, [0, 1, 0], [-1, -1, -1])
return new_attns, new_attn_states
评论列表
文章目录