def __call__(self, inputs, state, scope=None):
"""Run one step of SRU."""
with tf.variable_scope(scope or type(self).__name__): # "SRUCell"
with tf.variable_scope("x_hat"):
x = linear([inputs], self._num_units, False)
with tf.variable_scope("gates"):
concat = tf.sigmoid(linear([inputs], 2 * self._num_units, True))
f, r = tf.split(concat, 2, axis = 1)
with tf.variable_scope("candidates"):
c = self._activation(f * state + (1 - f) * x)
# variational dropout as suggested in the paper (disabled)
# if self._is_training and Params.dropout is not None:
# c = tf.nn.dropout(c, keep_prob = 1 - Params.dropout)
# highway connection
# Our implementation is slightly different to the paper
# https://arxiv.org/abs/1709.02755 in a way that highway network
# uses x_hat instead of the cell inputs. Check equation (7) from the original
# paper for SRU.
h = r * c + (1 - r) * x
return h, c
评论列表
文章目录