def _mean_pool(self, rnn_outputs, batch_size, max_char_len, max_word_len, char_lens):
"""
Perform mean-pooling after the character-RNN layer.
:param rnn_outputs: hidden states of all the time steps after the character-RNN layer
:return: mean of the hidden states over every time step
"""
# perform mean pooling over characters
rnn_outputs = tf.reduce_mean(rnn_outputs, reduction_indices=1)
# In order to avoid 0 padding affect the mean, multiply by `n / m` where `n` is
# `max_char_len` and `m` is `char_lens`
rnn_outputs = tf.multiply(rnn_outputs, tf.cast(max_char_len, tf.float32)) # multiply by `n`
# swap the dimensions in order to divide by an appropriate value for each time step
rnn_outputs = tf.transpose(rnn_outputs)
rnn_outputs = tf.divide(rnn_outputs, tf.cast(char_lens, tf.float32)) # divide by `m`
rnn_outputs = tf.transpose(rnn_outputs) # shape back to the original shape
# batch and word-len dimensions were merged before running character-RNN so shape it back
rnn_outputs = tf.reshape(rnn_outputs, [batch_size, max_word_len, self._char_rnn_size * 2])
# there are NaN due to padded words (with char_len=0) so convert those NaN to 0
rnn_outputs = tf.where(tf.is_nan(rnn_outputs), tf.zeros_like(rnn_outputs), rnn_outputs)
return rnn_outputs
评论列表
文章目录