def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__, initializer=self._initializer):
# Split the hidden state into blocks (each U, V, W are shared across blocks).
# U = tf.get_variable('U', [self._num_units_per_block, self._num_units_per_block],
# initializer= tf.constant_initializer(np.identity(self._num_units_per_block)),
# trainable = False)
# W = tf.get_variable('W', [self._num_units_per_block, self._num_units_per_block],
# initializer = tf.constant_initializer(np.zeros(self._num_units_per_block, self._num_units_per_block)),
# trainable = False)
# V = tf.get_variable('V', [self._num_units_per_block, self._num_units_per_block],
# initializer = tf.constant_initializer(np.zeros(self._num_units_per_block, self._num_units_per_block)),
# trainable = False)
# b = tf.get_variable('biasU',[self._num_units_per_block])
# self._q = tf.Print(self._q, [self._q],summarize=10)
# TODO: layer norm?
state = tf.split(state, self._num_blocks, 1)
next_states = []
for j, state_j in enumerate(state): # Hidden State (j)
key_j = self._keys[j]
gate_j = self.get_gate(state_j, key_j, inputs)
candidate_j = inputs
# Equation 4: h_j <- h_j + g_j * h_j^~
# Perform an update of the hidden state (memory).
state_j_next = state_j + tf.expand_dims(gate_j, -1) * candidate_j
# # Forget previous memories by normalization.
# state_j_next = tf.nn.l2_normalize(state_j_next, -1) # TODO: Is epsilon necessary?
next_states.append(state_j_next)
state_next = tf.concat(next_states, 1)
return state_next, state_next
评论列表
文章目录