def __init__(self, states, inner_size, trainable=True, scope=None):
"""Initiailzes a new instance of the BahdanauAttention class.
The attention mechanism implemented in this class is the one
described by Bahdanau et al. here: https://arxiv.org/abs/1409.0473.
The attention states and the query are projected to the attention
inner size, then summed together and processed with a tanh and
finally dot producted with an attention vector. All the operations
are performed on a reference size, named as the attention size, which
must be set during the initialization phase (with the `size` argument).
Arguments:
states: 3-D Tensor of shape [batch, timesteps, state] representing the
states on which the attention scores will be computed; the third dimension
of the tensor must be statically determined.
inner_size: int representing the inner attention size;
trainable: if True, variables will be trainable;
scope: None, str or tf.VariableScope representing the variable scope
of the layer which will be used to create all the needed variables.
Raises:
ValueError: if the last dimension of the `state` argument is not
statically determined.
"""
super(BahdanauAttention, self).__init__(trainable=trainable, scope=scope)
self._states = states
self._size = inner_size
self._memory = None
self._vector = None
self._var_op_names = set()
# check that the last dimension of the `states`
# variable is fully defined.
state_size = states.get_shape()[-1].value
if state_size is None:
raise ValueError('Last dimension of `states` must be defined, found %s'
% str(tf.shape(states)))
self._state_size = state_size
评论列表
文章目录