def task_specific_attention(inputs, output_size,
initializer=layers.xavier_initializer(),
activation_fn=tf.tanh, scope=None):
"""
Performs task-specific attention reduction, using learned
attention context vector (constant within task of interest).
Args:
inputs: Tensor of shape [batch_size, units, input_size]
`input_size` must be static (known)
`units` axis will be attended over (reduced from output)
`batch_size` will be preserved
output_size: Size of output's inner (feature) dimension
Returns:
outputs: Tensor of shape [batch_size, output_dim].
"""
assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None
with tf.variable_scope(scope or 'attention') as scope:
attention_context_vector = tf.get_variable(name='attention_context_vector',
shape=[output_size],
initializer=initializer,
dtype=tf.float32)
input_projection = layers.fully_connected(inputs, output_size,
activation_fn=activation_fn,
scope=scope)
vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
attention_weights = tf.nn.softmax(vector_attn, dim=1)
weighted_projection = tf.multiply(input_projection, attention_weights)
outputs = tf.reduce_sum(weighted_projection, axis=1)
return outputs
model_components.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录