def task_specific_attention(inputs, output_size,
initializer=layers.xavier_initializer(),
activation_fn=tf.tanh, scope=None):
"""
Performs task-specific attention reduction, using learned
attention context vector (constant within task of interest).
Args:
inputs: Tensor of shape [batch_size, units, input_size]
`input_size` must be static (known)
`units` axis will be attended over (reduced from output)
`batch_size` will be preserved
output_size: Size of output's inner (feature) dimension
Returns:
outputs: Tensor of shape [batch_size, output_dim].
"""
assert len(inputs.get_shape()) == 3 and inputs.get_shape(
)[-1].value is not None
with tf.variable_scope(scope or 'attention') as scope:
attention_context_vector = tf.get_variable(name='attention_context_vector',
shape=[output_size],
initializer=initializer,
dtype=tf.float32)
input_projection = layers.fully_connected(inputs, output_size,
activation_fn=activation_fn,
scope=scope)
attention_weights = tf.nn.softmax(
tf.multiply(input_projection, attention_context_vector)
)
weighted_projection = tf.multiply(input_projection, attention_weights)
outputs = tf.reduce_sum(weighted_projection, axis=1)
return outputs
评论列表
文章目录