def apply(self, is_train, inputs, mask=None):
inputs = tf.transpose(inputs, [1, 0, 2]) # to time first
with tf.variable_scope("forward"):
cell = LSTMBlockFusedCell(self.n_units, use_peephole=self.use_peepholes)
fw = cell(inputs, dtype=tf.float32, sequence_length=mask)[0]
with tf.variable_scope("backward"):
cell = LSTMBlockFusedCell(self.n_units, use_peephole=self.use_peepholes)
inputs = tf.reverse_sequence(inputs, mask, seq_axis=0, batch_axis=1)
bw = cell(inputs, dtype=tf.float32, sequence_length=mask)[0]
bw = tf.reverse_sequence(bw, mask, seq_axis=0, batch_axis=1)
out = tf.concat([fw, bw], axis=2)
out = tf.transpose(out, [1, 0, 2]) # back to batch first
return out
python类reverse_sequence()的实例源码
def _create_position_embedding(self, lengths, maxlen):
# Slice to size of current sequence
pe_slice = self.pos_embed[2:maxlen+2, :]
# Replicate encodings for each element in the batch
batch_size = tf.shape(lengths)[0]
pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
# Mask out positions that are padded
positions_mask = tf.sequence_mask(
lengths=lengths, maxlen=maxlen, dtype=tf.float32)
positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
positions_embed = tf.reverse_sequence(positions_embed, lengths, batch_dim=0, seq_dim=1) # [[1,2,3,4,PAD,PAD,PAD],[2,3,PAD,PAD,PAD,PAD,PAD]] [4,2]
positions_embed = tf.reverse(positions_embed,[1]) # --> [[4,3,2,1,PAD,PAD,PAD],[3,2,PAD,PAD,PAD,PAD,PAD]] --> [[PAD,PAD,PAD,1,2,3,4],[PAD,PAD,PAD,PAD,PAD,2,3]]
return positions_embed
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
assert not time_major # TODO : to be implemented later!
flat_inputs = flatten(inputs, 2) # [-1, J, d]
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_inputs, sequence_length, 1)
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
initial_state=initial_state, dtype=dtype,
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
time_major=time_major, scope=scope)
flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_outputs, sequence_length, 1)
outputs = reconstruct(flat_outputs, inputs, 2)
return outputs, final_state
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
assert not time_major # TODO : to be implemented later!
flat_inputs = flatten(inputs, 2) # [-1, J, d]
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_inputs, sequence_length, 1)
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
initial_state=initial_state, dtype=dtype,
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
time_major=time_major, scope=scope)
flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_outputs, sequence_length, 1)
outputs = reconstruct(flat_outputs, inputs, 2)
return outputs, final_state
def bidirectional_rnn(forward_cell, backward_cell, inputs, seq_lens_mask, concatenate=True):
seq_lens = tf.cast(tf.reduce_sum(seq_lens_mask, 1), tf.int32)
# Reverse inputs (batch x time x embedding_dim); takes care of variable seq_len
reverse_inputs = tf.reverse_sequence(inputs, seq_lens, seq_dim=1, batch_dim=0)
# Run forwards and backwards RNN
forward_outputs, forward_last_state = \
rnn(forward_cell, inputs, seq_lens_mask)
backward_outputs_reversed, backward_last_state = \
rnn(backward_cell, reverse_inputs, seq_lens_mask)
backward_outputs = tf.reverse_sequence(backward_outputs_reversed, seq_lens, seq_dim=1, batch_dim=0)
if concatenate:
# last_state dimensions: batch x hidden_size
last_state = tf.concat(1, [forward_last_state, backward_last_state])
# outputs dimensions: batch x time x hidden_size
outputs = tf.concat(2, [forward_outputs, backward_outputs])
# Dimensions: outputs (batch x time x hidden_size*2); last_state (batch x hidden_size*2)
return (outputs, last_state)
# Dimensions: outputs (batch x time x hidden_size); last_state (batch x hidden_size)
return (forward_outputs, forward_last_state, backward_outputs, backward_last_state)
def flip_randomly(inputs, horizontally, vertically, is_training, name=None):
"""Flip images randomly. Make separate flipping decision for each image.
Args:
inputs (4-D tensor): Input images (batch size, height, width, channels).
horizontally (bool): If True, flip horizontally with 50% probability. Otherwise, don't.
vertically (bool): If True, flip vertically with 50% probability. Otherwise, don't.
is_training (bool): If False, no flip is performed.
scope: A name for the operation.
"""
with tf.name_scope(name, "flip_randomly") as scope:
batch_size, height, width, _ = tf.unstack(tf.shape(inputs))
vertical_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) *
tf.to_int32(vertically) *
tf.to_int32(is_training))
horizontal_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) *
tf.to_int32(horizontally) *
tf.to_int32(is_training))
vertically_flipped = tf.reverse_sequence(inputs, vertical_choices * height, 1)
both_flipped = tf.reverse_sequence(vertically_flipped, horizontal_choices * width, 2)
return tf.identity(both_flipped, name=scope)
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
assert not time_major # TODO : to be implemented later!
flat_inputs = flatten(inputs, 2) # [-1, J, d]
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_inputs, sequence_length, 1)
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
initial_state=initial_state, dtype=dtype,
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
time_major=time_major, scope=scope)
flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_outputs, sequence_length, 1)
outputs = reconstruct(flat_outputs, inputs, 2)
return outputs, final_state
def map(self, is_train, x, mask=None):
x = tf.transpose(x, [1, 0, 2])
if self.bidirectional:
with tf.variable_scope("forward"):
fw = self._apply_transposed(is_train, x)[0]
with tf.variable_scope("backward"):
bw = self._apply_transposed(is_train, tf.reverse_sequence(x, mask, 0, 1))[0]
bw = tf.reverse_sequence(bw, mask, 0, 1)
out = tf.concat([fw, bw], axis=2)
else:
out = self._apply_transposed(is_train, x)[0]
out = tf.transpose(out, [1, 0, 2])
if mask is not None:
out *= tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(out)[1]), tf.float32), 2)
return out
def reverse_seq(input_seq, lengths):
"""Reverse a list of Tensors up to specified lengths.
Args:
input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
lengths: A tensor of dimension batch_size, containing lengths for each
sequence in the batch. If "None" is specified, simply reverses
the list.
Returns:
time-reversed sequence
"""
for input_ in input_seq:
input_.set_shape(input_.get_shape().with_rank(2))
# Join into (time, batch_size, depth)
s_joined = tf.pack(input_seq)
# Reverse along dimension 0
s_reversed = tf.reverse_sequence(s_joined, lengths, 0, 1)
# Split again into list
result = tf.unpack(s_reversed)
return result
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
assert not time_major # TODO : to be implemented later!
flat_inputs = flatten(inputs, 2) # [-1, J, d]
flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')
flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_inputs, sequence_length, 1)
flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
initial_state=initial_state, dtype=dtype,
parallel_iterations=parallel_iterations, swap_memory=swap_memory,
time_major=time_major, scope=scope)
flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
else tf.reverse_sequence(flat_outputs, sequence_length, 1)
outputs = reconstruct(flat_outputs, inputs, 2)
return outputs, final_state
def birnn(cell, inputs, sequence_length, initial_state_fw=None, initial_state_bw=None, ff_keep_prob=1., recur_keep_prob=1., dtype=tf.float32, scope=None):
""""""
# Forward direction
with tf.variable_scope(scope or 'BiRNN_FW') as fw_scope:
output_fw, output_state_fw = rnn(cell, inputs, sequence_length, initial_state_fw, ff_keep_prob, recur_keep_prob, dtype, scope=fw_scope)
# Backward direction
rev_inputs = tf.reverse_sequence(inputs, sequence_length, 1, 0)
with tf.variable_scope(scope or 'BiRNN_BW') as bw_scope:
output_bw, output_state_bw = rnn(cell, rev_inputs, sequence_length, initial_state_bw, ff_keep_prob, recur_keep_prob, dtype, scope=bw_scope)
output_bw = tf.reverse_sequence(output_bw, sequence_length, 1, 0)
# Concat each of the forward/backward outputs
outputs = tf.concat([output_fw, output_bw], 2)
return outputs, tf.tuple([output_state_fw, output_state_bw])
#===============================================================
def step(self, time_, inputs, state, name=None):
cell_output, cell_state = self.cell(inputs, state)
cell_output_new, logits, attention_scores, attention_context = \
self.compute_output(cell_output)
if self.reverse_scores_lengths is not None:
attention_scores = tf.reverse_sequence(
input=attention_scores,
seq_lengths=self.reverse_scores_lengths,
seq_dim=1,
batch_dim=0)
sample_ids = self.helper.sample(
time=time_, outputs=logits, state=cell_state)
outputs = AttentionDecoderOutput(
logits=logits,
predicted_ids=sample_ids,
cell_output=cell_output_new,
attention_scores=attention_scores,
attention_context=attention_context)
finished, next_inputs, next_state = self.helper.next_inputs(
time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)
return (outputs, next_state, next_inputs, finished)
def fused_rnn_backward(fused_rnn, inputs, sequence_length, initial_state=None, dtype=None, scope=None, time_major=True):
if not time_major:
inputs = tf.transpose(inputs, [1, 0, 2])
# assumes that time dim is 0 and batch is 1
rev_inputs = tf.reverse_sequence(inputs, sequence_length, 0, 1)
rev_outputs, last_state = fused_rnn(rev_inputs, sequence_length=sequence_length, initial_state=initial_state,
dtype=dtype, scope=scope)
outputs = tf.reverse_sequence(rev_outputs, sequence_length, 0, 1)
if not time_major:
outputs = tf.transpose(outputs, [1, 0, 2])
return outputs, last_state
def encode(self, features, labels):
features["source_ids"] = tf.reverse_sequence(features["source_ids"], features["source_len"], batch_dim=0, seq_dim=1) # [[1,2,3,4,PAD,PAD,PAD],[2,3,PAD,PAD,PAD,PAD,PAD]] [4,2]
features["source_ids"] = tf.reverse(features["source_ids"],[1]) # --> [[4,3,2,1,PAD,PAD,PAD],[3,2,PAD,PAD,PAD,PAD,PAD]] --> [[PAD,PAD,PAD,1,2,3,4],[PAD,PAD,PAD,PAD,PAD,2,3]]
source_embedded = tf.nn.embedding_lookup(self.source_embedding_fairseq(),
features["source_ids"])
encoder_fn = self.encoder_class(self.params["encoder.params"], self.mode, self.source_pos_embedding_fairseq())
return encoder_fn(source_embedded, features["source_len"])
def step(self, time_, inputs, state, name=None):
cell_output, cell_state = self.cell(inputs, state)
cell_output_new, logits, attention_scores, attention_context = \
self.compute_output(cell_output)
if self.reverse_scores_lengths is not None:
attention_scores = tf.reverse_sequence(
input=attention_scores,
seq_lengths=self.reverse_scores_lengths,
seq_dim=1,
batch_dim=0)
sample_ids = self.helper.sample(
time=time_, outputs=logits, state=cell_state)
outputs = AttentionDecoderOutput(
logits=logits,
predicted_ids=sample_ids,
cell_output=cell_output_new,
attention_scores=attention_scores,
attention_context=attention_context)
finished, next_inputs, next_state = self.helper.next_inputs(
time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)
return (outputs, next_state, next_inputs, finished)
def _encoder(cell_fw, cell_bw, inputs, sequence_length, dtype=None,
scope=None):
with tf.variable_scope(scope or "encoder",
values=[inputs, sequence_length]):
inputs_fw = inputs
inputs_bw = tf.reverse_sequence(inputs, sequence_length,
batch_axis=0, seq_axis=1)
with tf.variable_scope("forward"):
output_fw, state_fw = _gru_encoder(cell_fw, inputs_fw,
sequence_length, None,
dtype=dtype)
with tf.variable_scope("backward"):
output_bw, state_bw = _gru_encoder(cell_bw, inputs_bw,
sequence_length, None,
dtype=dtype)
output_bw = tf.reverse_sequence(output_bw, sequence_length,
batch_axis=0, seq_axis=1)
results = {
"annotation": tf.concat([output_fw, output_bw], axis=2),
"outputs": {
"forward": output_fw,
"backward": output_bw
},
"final_states": {
"forward": state_fw,
"backward": state_bw
}
}
return results
def _reverse_seq(input_seq, lengths):
"""Reverse a list of Tensors up to specified lengths.
Args:
input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
lengths: A tensor of dimension batch_size, containing lengths for each
sequence in the batch. If "None" is specified, simply reverses
the list.
Returns:
time-reversed sequence
"""
if lengths is None:
return list(reversed(input_seq))
input_shape = tensor_shape.matrix(None, None)
for input_ in input_seq:
input_shape.merge_with(input_.get_shape())
input_.set_shape(input_shape)
# Join into (time, batch_size, depth)
s_joined = tf.stack(input_seq)
if lengths is not None:
lengths = tf.to_int64(lengths)
# Reverse along dimension 0
s_reversed = tf.reverse_sequence(s_joined, lengths, 0, 1)
# Split again into list
result = tf.unstack(s_reversed)
for r in result:
r.set_shape(input_shape)
return result
def step(self, time_, inputs, state, name=None):
cell_output, cell_state = self.cell(inputs, state)
cell_output_new, logits, attention_scores, attention_context = \
self.compute_output(cell_output)
if self.reverse_scores_lengths is not None:
attention_scores = tf.reverse_sequence(
input=attention_scores,
seq_lengths=self.reverse_scores_lengths,
seq_dim=1,
batch_dim=0)
sample_ids = self.helper.sample(
time=time_, outputs=logits, state=cell_state)
outputs = AttentionDecoderOutput(
logits=logits,
predicted_ids=sample_ids,
cell_output=cell_output_new,
attention_scores=attention_scores,
attention_context=attention_context)
finished, next_inputs, next_state = self.helper.next_inputs(
time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)
return (outputs, next_state, next_inputs, finished)
def encode_sentences(self, text_emb, text_len, text_len_mask):
num_sentences = tf.shape(text_emb)[0]
max_sentence_length = tf.shape(text_emb)[1]
# Transpose before and after for efficiency.
inputs = tf.transpose(text_emb, [1, 0, 2]) # [max_sentence_length, num_sentences, emb]
with tf.variable_scope("fw_cell"):
cell_fw = util.CustomLSTMCell(self.config["lstm_size"], num_sentences, self.dropout)
preprocessed_inputs_fw = cell_fw.preprocess_input(inputs)
with tf.variable_scope("bw_cell"):
cell_bw = util.CustomLSTMCell(self.config["lstm_size"], num_sentences, self.dropout)
preprocessed_inputs_bw = cell_bw.preprocess_input(inputs)
preprocessed_inputs_bw = tf.reverse_sequence(preprocessed_inputs_bw,
seq_lengths=text_len,
seq_dim=0,
batch_dim=1)
state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1]))
state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1]))
with tf.variable_scope("lstm"):
with tf.variable_scope("fw_lstm"):
fw_outputs, fw_states = tf.nn.dynamic_rnn(cell=cell_fw,
inputs=preprocessed_inputs_fw,
sequence_length=text_len,
initial_state=state_fw,
time_major=True)
with tf.variable_scope("bw_lstm"):
bw_outputs, bw_states = tf.nn.dynamic_rnn(cell=cell_bw,
inputs=preprocessed_inputs_bw,
sequence_length=text_len,
initial_state=state_bw,
time_major=True)
bw_outputs = tf.reverse_sequence(bw_outputs,
seq_lengths=text_len,
seq_dim=0,
batch_dim=1)
text_outputs = tf.concat([fw_outputs, bw_outputs], 2)
text_outputs = tf.transpose(text_outputs, [1, 0, 2]) # [num_sentences, max_sentence_length, emb]
return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
def rnn_layer(rnn_input: tf.Tensor, lengths: tf.Tensor,
rnn_spec: RNNSpec) -> Tuple[tf.Tensor, tf.Tensor]:
"""Construct a RNN layer given its inputs and specs.
Arguments:
rnn_inputs: The input sequence to the RNN.
lengths: Lengths of input sequences.
rnn_spec: A valid RNNSpec tuple specifying the network architecture.
"""
if rnn_spec.direction == "bidirectional":
fw_cell = _make_rnn_cell(rnn_spec)
bw_cell = _make_rnn_cell(rnn_spec)
outputs_tup, states_tup = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell, rnn_input, sequence_length=lengths,
dtype=tf.float32)
outputs = tf.concat(outputs_tup, 2)
if rnn_spec.cell_type == "LSTM":
states_tup = (state.h for state in states_tup)
final_state = tf.concat(list(states_tup), 1)
else:
if rnn_spec.direction == "backward":
rnn_input = tf.reverse_sequence(rnn_input, lengths, seq_axis=1)
cell = _make_rnn_cell(rnn_spec)
outputs, final_state = tf.nn.dynamic_rnn(
cell, rnn_input, sequence_length=lengths, dtype=tf.float32)
if rnn_spec.direction == "backward":
outputs = tf.reverse_sequence(outputs, lengths, seq_axis=1)
if rnn_spec.cell_type == "LSTM":
final_state = final_state.h
return outputs, final_state
def rnn_layer(rnn_input: tf.Tensor, lengths: tf.Tensor,
rnn_spec: RNNSpec) -> Tuple[tf.Tensor, tf.Tensor]:
"""Construct a RNN layer given its inputs and specs.
Arguments:
rnn_inputs: The input sequence to the RNN.
lengths: Lengths of input sequences.
rnn_spec: A valid RNNSpec tuple specifying the network architecture.
"""
if rnn_spec.direction == "bidirectional":
fw_cell = _make_rnn_cell(rnn_spec)
bw_cell = _make_rnn_cell(rnn_spec)
outputs_tup, states_tup = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell, rnn_input, sequence_length=lengths,
dtype=tf.float32)
outputs = tf.concat(outputs_tup, 2)
if rnn_spec.cell_type == "LSTM":
states_tup = (state.h for state in states_tup)
final_state = tf.concat(list(states_tup), 1)
else:
if rnn_spec.direction == "backward":
rnn_input = tf.reverse_sequence(rnn_input, lengths, seq_axis=1)
cell = _make_rnn_cell(rnn_spec)
outputs, final_state = tf.nn.dynamic_rnn(
cell, rnn_input, sequence_length=lengths, dtype=tf.float32)
if rnn_spec.direction == "backward":
outputs = tf.reverse_sequence(outputs, lengths, seq_axis=1)
if rnn_spec.cell_type == "LSTM":
final_state = final_state.h
return outputs, final_state
def rnn_layer(rnn_input: tf.Tensor, lengths: tf.Tensor,
rnn_spec: RNNSpec) -> Tuple[tf.Tensor, tf.Tensor]:
"""Construct a RNN layer given its inputs and specs.
Arguments:
rnn_inputs: The input sequence to the RNN.
lengths: Lengths of input sequences.
rnn_spec: A valid RNNSpec tuple specifying the network architecture.
"""
if rnn_spec.direction == "bidirectional":
fw_cell = _make_rnn_cell(rnn_spec)
bw_cell = _make_rnn_cell(rnn_spec)
outputs_tup, states_tup = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell, rnn_input, sequence_length=lengths,
dtype=tf.float32)
outputs = tf.concat(outputs_tup, 2)
if rnn_spec.cell_type == "LSTM":
states_tup = (state.h for state in states_tup)
final_state = tf.concat(list(states_tup), 1)
else:
if rnn_spec.direction == "backward":
rnn_input = tf.reverse_sequence(rnn_input, lengths, seq_axis=1)
cell = _make_rnn_cell(rnn_spec)
outputs, final_state = tf.nn.dynamic_rnn(
cell, rnn_input, sequence_length=lengths, dtype=tf.float32)
if rnn_spec.direction == "backward":
outputs = tf.reverse_sequence(outputs, lengths, seq_axis=1)
if rnn_spec.cell_type == "LSTM":
final_state = final_state.h
return outputs, final_state
def step(self, time_, inputs, state, name=None):
cell_output, cell_state = self.cell(inputs, state)
cell_output_new, logits, attention_scores, attention_context = \
self.compute_output(cell_output)
if self.reverse_scores_lengths is not None:
attention_scores = tf.reverse_sequence(
input=attention_scores,
seq_lengths=self.reverse_scores_lengths,
seq_dim=1,
batch_dim=0)
sample_ids = self.helper.sample(
time=time_, outputs=logits, state=cell_state)
outputs = AttentionDecoderOutput(
logits=logits,
predicted_ids=sample_ids,
cell_output=cell_output_new,
attention_scores=attention_scores,
attention_context=attention_context)
finished, next_inputs, next_state = self.helper.next_inputs(
time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)
return (outputs, next_state, next_inputs, finished)
def _encoder(cell_fw, cell_bw, inputs, sequence_length, dtype=None,
scope=None):
with tf.variable_scope(scope or "encoder",
values=[inputs, sequence_length]):
inputs_fw = inputs
inputs_bw = tf.reverse_sequence(inputs, sequence_length,
batch_axis=0, seq_axis=1)
with tf.variable_scope("forward"):
output_fw, state_fw = _gru_encoder(cell_fw, inputs_fw,
sequence_length, None,
dtype=dtype)
with tf.variable_scope("backward"):
output_bw, state_bw = _gru_encoder(cell_bw, inputs_bw,
sequence_length, None,
dtype=dtype)
output_bw = tf.reverse_sequence(output_bw, sequence_length,
batch_axis=0, seq_axis=1)
results = {
"annotation": tf.concat([output_fw, output_bw], axis=2),
"outputs": {
"forward": output_fw,
"backward": output_bw
},
"final_states": {
"forward": state_fw,
"backward": state_bw
}
}
return results
def test_ReverseSequence(self):
x = self.random(3, 4, 10)
t = tf.reverse_sequence(x, [5, 0, 0, 8], seq_dim=2, batch_dim=1)
self.check(t)
def encode_sentences(self, text_emb, text_len, text_len_mask):
"""
Passes the input tensor through bi_LSTM.
Args:
text_emb: [num_sentences, max_sentence_length, emb], text code in tensor
text_len: tf.int32, [Amount of sentences]
text_len_mask: boolean mask for text_emb
Returns: [num_sentences, max_sentence_length, emb], output of bi-LSTM after boolean mask application
"""
num_sentences = tf.shape(text_emb)[0]
max_sentence_length = tf.shape(text_emb)[1]
# Transpose before and after for efficiency.
inputs = tf.transpose(text_emb, [1, 0, 2]) # [max_sentence_length, num_sentences, emb]
with tf.variable_scope("fw_cell"):
cell_fw = utils.CustomLSTMCell(self.opt["lstm_size"], num_sentences, self.dropout)
preprocessed_inputs_fw = cell_fw.preprocess_input(inputs)
with tf.variable_scope("bw_cell"):
cell_bw = utils.CustomLSTMCell(self.opt["lstm_size"], num_sentences, self.dropout)
preprocessed_inputs_bw = cell_bw.preprocess_input(inputs)
preprocessed_inputs_bw = tf.reverse_sequence(preprocessed_inputs_bw,
seq_lengths=text_len,
seq_dim=0,
batch_dim=1)
state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]),
tf.tile(cell_fw.initial_state.h, [num_sentences, 1]))
state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]),
tf.tile(cell_bw.initial_state.h, [num_sentences, 1]))
with tf.variable_scope("lstm"):
with tf.variable_scope("fw_lstm"):
fw_outputs, fw_states = tf.nn.dynamic_rnn(cell=cell_fw,
inputs=preprocessed_inputs_fw,
sequence_length=text_len,
initial_state=state_fw,
time_major=True)
with tf.variable_scope("bw_lstm"):
bw_outputs, bw_states = tf.nn.dynamic_rnn(cell=cell_bw,
inputs=preprocessed_inputs_bw,
sequence_length=text_len,
initial_state=state_bw,
time_major=True)
bw_outputs = tf.reverse_sequence(bw_outputs,
seq_lengths=text_len,
seq_dim=0,
batch_dim=1)
text_outputs = tf.concat([fw_outputs, bw_outputs], 2)
text_outputs = tf.transpose(text_outputs, [1, 0, 2]) # [num_sentences, max_sentence_length, emb]
return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
"""Creates a model which uses a stack of LSTMs to represent the video.
Args:
model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
input features.
vocab_size: The number of classes in the dataset.
num_frames: A vector of length 'batch' which indicates the number of
frames for each video (before padding).
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
'batch_size' x 'num_classes'.
"""
lstm_size = FLAGS.lstm_cells
number_of_layers = FLAGS.lstm_layers
random_frames = FLAGS.lstm_random_sequence
iterations = FLAGS.iterations
backward = FLAGS.lstm_backward
if random_frames:
num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
model_input = utils.SampleRandomFrames(model_input, num_frames_2,
iterations)
if backward:
model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1)
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[
tf.contrib.rnn.BasicLSTMCell(
lstm_size, forget_bias=1.0, state_is_tuple=False)
for _ in range(number_of_layers)
], state_is_tuple=False)
loss = 0.0
with tf.variable_scope("RNN"):
outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input,
sequence_length=num_frames,
dtype=tf.float32)
aggregated_model = getattr(video_level_models,
FLAGS.video_level_classifier_model)
return aggregated_model().create_model(
model_input=state,
vocab_size=vocab_size,
is_training=is_training,
**unused_params)
def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
"""Creates a model which uses a stack of GRUs to represent the video.
Args:
model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
input features.
vocab_size: The number of classes in the dataset.
num_frames: A vector of length 'batch' which indicates the number of
frames for each video (before padding).
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
'batch_size' x 'num_classes'.
"""
gru_size = FLAGS.gru_cells
number_of_layers = FLAGS.gru_layers
backward = FLAGS.gru_backward
random_frames = FLAGS.gru_random_sequence
iterations = FLAGS.iterations
if random_frames:
num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
model_input = utils.SampleRandomFrames(model_input, num_frames_2,
iterations)
if backward:
model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1)
stacked_GRU = tf.contrib.rnn.MultiRNNCell(
[
tf.contrib.rnn.GRUCell(gru_size)
for _ in range(number_of_layers)
], state_is_tuple=False)
loss = 0.0
with tf.variable_scope("RNN"):
outputs, state = tf.nn.dynamic_rnn(stacked_GRU, model_input,
sequence_length=num_frames,
dtype=tf.float32)
aggregated_model = getattr(video_level_models,
FLAGS.video_level_classifier_model)
return aggregated_model().create_model(
model_input=state,
vocab_size=vocab_size,
is_training=is_training,
**unused_params)
def inference(X, weights, bias, reuse = None, trainMode = True):
word_vectors = tf.nn.embedding_lookup(WORDS, X)
# [batch_size, 80, 50]
length = GetLength(X)
length_64 = tf.cast(length, tf.int64)
reuse = None if trainMode else True
#if trainMode:
# word_vectors = tf.nn.dropout(word_vectors, 0.5)
with tf.variable_scope("rnn_fwbw", reuse = reuse) as scope:
forward_output, _ = tf.nn.dynamic_rnn(
tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, reuse = reuse),
word_vectors,
dtype = tf.float32,
sequence_length = length,
scope = "RNN_forward")
backward_output_, _ = tf.nn.dynamic_rnn(
tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, reuse = reuse),
inputs = tf.reverse_sequence(word_vectors,
length_64,
seq_dim = 1),
dtype = tf.float32,
sequence_length = length,
scope = "RNN_backword")
backward_output = tf.reverse_sequence(backward_output_,
length_64,
seq_dim = 1)
output = tf.concat([forward_output, backward_output], 2)
# [batch_size, 80, 200]
output = tf.reshape(output, [-1, FLAGS.num_hidden * 2])
if trainMode:
output = tf.nn.dropout(output, 0.5)
matricized_unary_scores = tf.matmul(output, weights) + bias
# [batch_size, 80, 4]
unary_scores = tf.reshape(
matricized_unary_scores,
[-1, FLAGS.max_sentence_len, FLAGS.num_tags])
return unary_scores, length
def inference(self, X, reuse=None, trainMode=True):
word_vectors = tf.nn.embedding_lookup(self.words, X)
length = self.length(X)
length_64 = tf.cast(length, tf.int64)
reuse = None if trainMode else True
if FLAGS.embedding_size_2 > 0:
word_vectors2 = tf.nn.embedding_lookup(self.words2, X)
word_vectors = tf.concat(2, [word_vectors, word_vectors2])
#if trainMode:
# word_vectors = tf.nn.dropout(word_vectors, 0.5)
with tf.variable_scope("rnn_fwbw", reuse=reuse) as scope:
forward_output, _ = tf.nn.dynamic_rnn(
tf.contrib.rnn.LSTMCell(self.numHidden,
reuse=reuse),
word_vectors,
dtype=tf.float32,
sequence_length=length,
scope="RNN_forward")
backward_output_, _ = tf.nn.dynamic_rnn(
tf.contrib.rnn.LSTMCell(self.numHidden,
reuse=reuse),
inputs=tf.reverse_sequence(word_vectors,
length_64,
seq_dim=1),
dtype=tf.float32,
sequence_length=length,
scope="RNN_backword")
backward_output = tf.reverse_sequence(backward_output_,
length_64,
seq_dim=1)
output = tf.concat([forward_output, backward_output], 2)
output = tf.reshape(output, [-1, self.numHidden * 2])
if trainMode:
output = tf.nn.dropout(output, 0.5)
matricized_unary_scores = tf.matmul(output, self.W) + self.b
# matricized_unary_scores = tf.nn.log_softmax(matricized_unary_scores)
unary_scores = tf.reshape(
matricized_unary_scores,
[-1, FLAGS.max_sentence_len, self.distinctTagNum])
return unary_scores, length