def rnn(self, model_input, lstm_size, num_frames,sub_scope="", **unused_params):
"""Creates a model which uses a stack of LSTMs to represent the video.
Args:
model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
input features.
vocab_size: The number of classes in the dataset.
num_frames: A vector of length 'batch' which indicates the number of
frames for each video (before padding).
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
'batch_size' x 'num_classes'.
"""
## Batch normalize the input
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[
tf.contrib.rnn.BasicLSTMCell(
lstm_size, forget_bias=1.0, state_is_tuple=True)
for _ in range(1)
],
state_is_tuple=True)
with tf.variable_scope("RNN-"+sub_scope):
outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input,
sequence_length=num_frames,
swap_memory=True,
dtype=tf.float32)
state_out = tf.concat(map(lambda x: x.c, state), axis=1)
return state_out
评论列表
文章目录