def unpack_into_tensorarray(value, axis, size=None):
"""
unpacks a given tensor along a given axis into a TensorArray
Parameters:
----------
value: Tensor
the tensor to be unpacked
axis: int
the axis to unpack the tensor along
size: int
the size of the array to be used if shape inference resulted in None
Returns: TensorArray
the unpacked TensorArray
"""
shape = value.get_shape().as_list()
rank = len(shape)
dtype = value.dtype
array_size = shape[axis] if not shape[axis] is None else size
if array_size is None:
raise ValueError("Can't create TensorArray with size None")
array = tf.TensorArray(dtype=dtype, size=array_size)
dim_permutation = [axis] + range(1, axis) + [0] + range(axis + 1, rank)
unpack_axis_major_value = tf.transpose(value, dim_permutation)
full_array = array.unpack(unpack_axis_major_value)
return full_array
python类TensorArray()的实例源码
def add_model_variable(self):
with tf.variable_scope('embedding'):
self.embedding = tf.get_variable(name='wVector',
shape=[len(self.lexicon),
self.wordSize])
with tf.variable_scope('weights'):
self.tensorV = tf.get_variable(name='tensorV',
shape=[2 * self.wordSize,
2 * self.wordSize,
self.wordSize])
self.linearW = tf.get_variable(name='linearW',
shape=[2 * self.wordSize,
self.wordSize])
self.softW = tf.get_variable(name='softW',
shape=[self.wordSize,
self.labelNum])
with tf.variable_scope('bias'):
self.linearB = tf.get_variable(name='linearB',
shape=[1, self.wordSize])
self.softB = tf.get_variable(name='softB',
shape=[1, self.labelNum])
self.modelArray = tf.TensorArray(tf.float32, size=0,
dynamic_size=True,
clear_after_read=False,
infer_shape=False)
# word vector indice
def _custom_rnn_loop_fn(self, cell_size, training_wheels):
def loop_fn(time, cell_output, cell_state, loop_state):
if cell_output is None: # time == 0
context_vectors_array = tf.TensorArray(tf.float32, size=tf.shape(self.references_placeholder)[1] + 1)
attention_logits_array = tf.TensorArray(tf.float32, size=tf.shape(self.references_placeholder)[1] + 1)
pointer_probability_array = tf.TensorArray(tf.float32,
size=tf.shape(self.references_placeholder)[1] + 1)
next_cell_state = self.final_encoder_state
go_id = self.summary_vocabulary.word_to_id('<GO>')
last_output_embedding = tf.nn.embedding_lookup(self.embeddings, tf.tile([go_id], [self.batch_size]))
else:
context_vectors_array, attention_logits_array, pointer_probability_array = loop_state
next_cell_state = cell_state
if training_wheels:
voc_indices = self.references_placeholder[:, time - 1]
pointer_indices = self.pointer_reference_placeholder[:, time - 1]
pointer_switch = tf.cast(self.pointer_switch_placeholder[:, time - 1], tf.bool)
batch_range = tf.range(self.batch_size)
pointer_indexer = tf.stack([batch_range, pointer_indices], axis=1)
attention_vocabulary_indices = tf.gather_nd(self.documents_placeholder, pointer_indexer)
mixed_indices = tf.where(pointer_switch, attention_vocabulary_indices, voc_indices)
last_output_embedding = tf.nn.embedding_lookup(self.embeddings, mixed_indices)
else:
last_output_embedding = self._extract_argmax_and_embed(cell_output, cell_size,
tf.shape(self.documents_placeholder)[0])
context_vector, attention_logits = self._attention(next_cell_state, last_output_embedding)
pointer_probabilities = self._pointer_probabilities(context_vector, next_cell_state, last_output_embedding)
context_vectors_array = context_vectors_array.write(time, context_vector)
attention_logits_array = attention_logits_array.write(time, attention_logits)
pointer_probability_array = pointer_probability_array.write(time, pointer_probabilities)
next_input = tf.concat([last_output_embedding, context_vector, self.query_last], axis=1)
elements_finished = (time >= self.reference_lengths_placeholder)
emit_output = cell_output
next_loop_state = (context_vectors_array, attention_logits_array, pointer_probability_array)
return elements_finished, next_input, next_cell_state, emit_output, next_loop_state
return loop_fn
def _gru_encoder(cell, inputs, sequence_length, initial_state, dtype=None):
# Assume that the underlying cell is GRUCell-like
output_size = cell.output_size
dtype = dtype or inputs.dtype
batch = tf.shape(inputs)[0]
time_steps = tf.shape(inputs)[1]
zero_output = tf.zeros([batch, output_size], dtype)
if initial_state is None:
initial_state = cell.zero_state(batch, dtype)
input_ta = tf.TensorArray(dtype, time_steps,
tensor_array_name="input_array")
output_ta = tf.TensorArray(dtype, time_steps,
tensor_array_name="output_array")
input_ta = input_ta.unstack(tf.transpose(inputs, [1, 0, 2]))
def loop_func(t, out_ta, state):
inp_t = input_ta.read(t)
cell_output, new_state = cell(inp_t, state)
cell_output = _copy_through(t, sequence_length, zero_output,
cell_output)
new_state = _copy_through(t, sequence_length, state, new_state)
out_ta = out_ta.write(t, cell_output)
return t + 1, out_ta, new_state
time = tf.constant(0, dtype=tf.int32, name="time")
loop_vars = (time, output_ta, initial_state)
outputs = tf.while_loop(lambda t, *_: t < time_steps, loop_func,
loop_vars, parallel_iterations=32,
swap_memory=True)
output_final_ta = outputs[1]
final_state = outputs[2]
all_output = output_final_ta.stack()
all_output.set_shape([None, None, output_size])
all_output = tf.transpose(all_output, [1, 0, 2])
return all_output, final_state
def non_max_suppression(inputs, scores, batch_size, max_output_size,
score_threshold=0.7, iou_threshold=0.7, nonempty=False, name='nms'):
""" Perform NMS on batch of images.
Parameters
----------
inputs: tf.Tuple
each components is a set of bboxes for corresponding image
scores: tf.Tuple
scores of inputs
batch_size:
size of batch of inputs
max_output_size:
maximal size of bboxes per image
score_threshold: float
bboxes with score less the score_threshold will be dropped
iou_threshold: float
bboxes with iou which is greater then iou_threshold will be merged
nonempty: bool
if True at least one bbox per image will be returned
name: str
scope name
Returns
-------
tf.Tuple
indices of selected bboxes for each image
"""
with tf.variable_scope(name):
ix = tf.constant(0)
filtered_rois = tf.TensorArray(dtype=tf.int32, size=batch_size, infer_shape=False)
loop_cond = lambda ix, filtered_rois: tf.less(ix, batch_size)
def _loop_body(ix, filtered_rois):
indices, score, roi = _filter_tensor(scores[ix], score_threshold, inputs[ix]) # pylint: disable=unbalanced-tuple-unpacking
roi_corners = tf.concat([roi[:, :2], roi[:, :2]+roi[:, 2:]], axis=-1)
roi_after_nms = tf.image.non_max_suppression(roi_corners, score, max_output_size, iou_threshold)
if nonempty:
is_not_empty = lambda: filtered_rois.write(ix,
tf.cast(tf.gather(indices, roi_after_nms),
dtype=tf.int32))
is_empty = lambda: filtered_rois.write(ix, tf.constant([[0]]))
filtered_rois = tf.cond(tf.not_equal(tf.shape(indices)[0], 0), is_not_empty, is_empty)
else:
filtered_rois = filtered_rois.write(ix, tf.cast(tf.gather(indices, roi_after_nms), dtype=tf.int32))
return [ix+1, filtered_rois]
_, res = tf.while_loop(loop_cond, _loop_body, [ix, filtered_rois])
res = _array_to_tuple(res, batch_size, [-1, 1])
return res
def _resize_except_axis(inputs, size, axis, **kwargs):
""" Resize 3D input tensor to size except just one axis. """
perm = np.arange(5)
reverse_perm = np.arange(5)
if axis == 0:
spatial_perm = [2, 3, 1]
reverse_spatial_perm = [3, 1, 2]
elif axis == 1:
spatial_perm = [1, 3, 2]
reverse_spatial_perm = [1, 3, 2]
else:
spatial_perm = [1, 2, 3]
reverse_spatial_perm = [1, 2, 3]
perm[1:4] = spatial_perm
reverse_perm[1:4] = reverse_spatial_perm
x = tf.transpose(inputs, perm)
if isinstance(size, tf.Tensor):
size = tf.unstack(size)
size = [size[i-1] for i in spatial_perm]
size = tf.stack(size)
else:
size = [size[i-1] for i in spatial_perm]
real_size, static_shape = _calc_size_after_resize(x, size, [0, 1])
real_size = size[:-1]
array = tf.TensorArray(tf.float32, size=tf.shape(x)[-2])
partial_sl = [slice(None)] * 5
def _loop(idx, array):
partial_sl[-2] = idx
tensor = x[partial_sl]
tensor = tf.image.resize_bilinear(tensor, size=real_size, name='resize_2d', **kwargs)
array = array.write(idx, tensor)
return [idx+1, array]
i = 0
_, array = tf.while_loop(lambda i, array: i < tf.shape(x)[-2], _loop, [i, array])
array = array.stack()
array = tf.transpose(array, [1, 2, 3, 0, 4])
array.set_shape(static_shape)
array = tf.transpose(array, reverse_perm)
return array
def do_inference_steps(self, initial_state, premise, hypothesis):
self.one_minus_eps = tf.constant(1.0 - self.config.eps, tf.float32,[self.batch_size])
self.N = tf.constant(self.config.max_computation, tf.float32,[self.batch_size])
prob = tf.constant(0.0,tf.float32,[self.batch_size], name="prob")
prob_compare = tf.constant(0.0,tf.float32,[self.batch_size], name="prob_compare")
counter = tf.constant(0.0, tf.float32,[self.batch_size], name="counter")
i = tf.constant(0, tf.int32, name="index")
acc_states = tf.zeros_like(initial_state, tf.float32, name="state_accumulator")
batch_mask = tf.constant(True, tf.bool,[self.batch_size])
# Tensor arrays to collect information about the run:
array_probs = tf.TensorArray(tf.float32,0, dynamic_size=True)
premise_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
hypothesis_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
incremental_states = tf.TensorArray(tf.float32,0, dynamic_size=True)
# While loop stops when this predicate is FALSE.
# Ie all (probability < 1-eps AND counter < N) are false.
pred = lambda i ,incremental_states, array_probs, premise_attention, hypothesis_attention, batch_mask,prob_compare,prob,\
counter,state,premise, hypothesis ,acc_state:\
tf.reduce_any(
tf.logical_and(
tf.less(prob_compare,self.one_minus_eps),
tf.less(counter,self.N)))
# only stop if all of the batch have passed either threshold
# Do while loop iterations until predicate above is false.
i,incremental_states, array_probs,premise_attention,hypothesis_attention,_,_,remainders,iterations,_,_,_,state = \
tf.while_loop(pred,self.inference_step,
[i,incremental_states, array_probs, premise_attention, hypothesis_attention,
batch_mask,prob_compare,prob,
counter,initial_state,premise, hypothesis, acc_states])
self.ACTPROB = array_probs.pack()
self.ACTPREMISEATTN = premise_attention.pack()
self.ACTHYPOTHESISATTN = hypothesis_attention.pack()
self.incremental_states = incremental_states.pack()
return state, remainders, iterations
def do_inference_steps(self, initial_state, premise, hypothesis):
self.one_minus_eps = tf.constant(1.0 - self.config.eps, tf.float32,[self.batch_size])
self.N = tf.constant(self.config.max_computation, tf.float32,[self.batch_size])
prob = tf.constant(0.0,tf.float32,[self.batch_size], name="prob")
prob_compare = tf.constant(0.0,tf.float32,[self.batch_size], name="prob_compare")
counter = tf.constant(0.0, tf.float32,[self.batch_size], name="counter")
i = tf.constant(0, tf.int32, name="index")
acc_states = tf.zeros_like(initial_state, tf.float32, name="state_accumulator")
batch_mask = tf.constant(True, tf.bool,[self.batch_size])
# Tensor arrays to collect information about the run:
array_probs = tf.TensorArray(tf.float32,0, dynamic_size=True)
premise_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
hypothesis_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
# While loop stops when this predicate is FALSE.
# Ie all (probability < 1-eps AND counter < N) are false.
pred = lambda i ,array_probs, premise_attention, hypothesis_attention, batch_mask,prob_compare,prob,\
counter,state,premise, hypothesis ,acc_state:\
tf.reduce_any(
tf.logical_and(
tf.less(prob_compare,self.one_minus_eps),
tf.less(counter,self.N)))
# only stop if all of the batch have passed either threshold
# Do while loop iterations until predicate above is false.
i,array_probs,premise_attention,hypothesis_attention,_,_,remainders,iterations,_,_,_,state = \
tf.while_loop(pred,self.inference_step,
[i,array_probs, premise_attention, hypothesis_attention,
batch_mask,prob_compare,prob,
counter,initial_state,premise, hypothesis, acc_states])
self.ACTPROB = array_probs.pack()
self.ACTPREMISEATTN = premise_attention.pack()
self.ACTHYPOTHESISATTN = hypothesis_attention.pack()
return state, remainders, iterations
def do_act_steps(self, premise, hypothesis):
self.rep_size = premise.get_shape()[-1].value
self.one_minus_eps = tf.constant(1.0 - self.config.eps, tf.float32,[self.batch_size])
self.N = tf.constant(self.config.max_computation, tf.float32,[self.batch_size])
prob = tf.constant(0.0,tf.float32,[self.batch_size], name="prob")
prob_compare = tf.constant(0.0,tf.float32,[self.batch_size], name="prob_compare")
counter = tf.constant(0.0, tf.float32,[self.batch_size], name="counter")
initial_state = tf.zeros([self.batch_size, 2*self.rep_size], tf.float32, name="state")
i = tf.constant(0, tf.int32, name="index")
acc_states = tf.zeros_like(initial_state, tf.float32, name="state_accumulator")
batch_mask = tf.constant(True, tf.bool,[self.batch_size])
# Tensor arrays to collect information about the run:
array_probs = tf.TensorArray(tf.float32,0, dynamic_size=True)
premise_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
hypothesis_attention = tf.TensorArray(tf.float32,0, dynamic_size=True)
# While loop stops when this predicate is FALSE.
# Ie all (probability < 1-eps AND counter < N) are false.
pred = lambda i ,array_probs, premise_attention, hypothesis_attention, batch_mask,prob_compare,prob,\
counter,state,premise, hypothesis ,acc_state:\
tf.reduce_any(
tf.logical_and(
tf.less(prob_compare,self.one_minus_eps),
tf.less(counter,self.N)))
# only stop if all of the batch have passed either threshold
# Do while loop iterations until predicate above is false.
i,array_probs,premise_attention,hypothesis_attention,_,_,remainders,iterations,_,_,_,state = \
tf.while_loop(pred,self.inference_step,
[i,array_probs, premise_attention, hypothesis_attention,
batch_mask,prob_compare,prob,
counter,initial_state,premise, hypothesis, acc_states])
self.ACTPROB = array_probs.pack()
self.ACTPREMISEATTN = premise_attention.pack()
self.ACTHYPOTHESISATTN = hypothesis_attention.pack()
return state, remainders, iterations
def __call__(self, X):
"""
Performs the LSTM's forget, input and output operations
according to: http://arxiv.org/pdf/1402.1128v1.pdf without peepholes
Parameters:
----------
X: list[Tensor]
The input list to process by the LSTM
"""
outputs = tf.TensorArray(tf.float32, len(X))
inputs = tf.TensorArray(tf.float32, len(X))
t = tf.constant(0, dtype=tf.int32)
for i, step_input in enumerate(X):
inputs = inputs.write(i, step_input)
def step_op(time, prev_state, prev_output, inputs_list, outputs_list):
time_step = inputs_list.read(time)
gates = tf.matmul(time_step, self.input_weights) + tf.matmul(prev_output, self.output_weights) + self.bias
gates = tf.reshape(gates, [-1, self.num_hidden, 4])
input_gate = tf.sigmoid(gates[:, :, 0])
forget_gate = tf.sigmoid(gates[:, :, 1])
candidate_state = tf.tanh(gates[:, :, 2])
output_gate = tf.sigmoid(gates[:, :, 3])
state = forget_gate * prev_state + input_gate * candidate_state
output = output_gate * tf.tanh(state)
new_outputs = outputs_list.write(time, output)
return time + 1, state, output, inputs_list, new_outputs
_, state, output, _, final_outputs = tf.while_loop(
cond=lambda time, *_: time < len(X),
body= step_op,
loop_vars=(t, self.prev_state, self.prev_output, inputs, outputs),
parallel_iterations=32,
swap_memory=True
)
self.prev_state.assign(state)
self.prev_output.assign(output)
return [final_outputs.read(t) for t in range(len(X))]
def __call__(self, X):
"""
Performs the LSTM's forget, input and output operations
according to: http://arxiv.org/pdf/1402.1128v1.pdf without peepholes
Parameters:
----------
X: list[Tensor]
The input list to process by the LSTM
"""
outputs = tf.TensorArray(tf.float32, len(X))
inputs = tf.TensorArray(tf.float32, len(X))
t = tf.constant(0, dtype=tf.int32)
for i, step_input in enumerate(X):
inputs = inputs.write(i, step_input)
def step_op(time, prev_state, prev_output, inputs_list, outputs_list):
time_step = inputs_list.read(time)
gates = tf.matmul(time_step, self.input_weights) + tf.matmul(prev_output, self.output_weights) + self.bias
gates = tf.reshape(gates, [-1, self.num_hidden, 4])
input_gate = tf.sigmoid(gates[:, :, 0])
forget_gate = tf.sigmoid(gates[:, :, 1])
candidate_state = tf.tanh(gates[:, :, 2])
output_gate = tf.sigmoid(gates[:, :, 3])
state = forget_gate * prev_state + input_gate * candidate_state
output = output_gate * tf.tanh(state)
new_outputs = outputs_list.write(time, output)
return time + 1, state, output, inputs_list, new_outputs
_, state, output, _, final_outputs = tf.while_loop(
cond=lambda time, *_: time < len(X),
body= step_op,
loop_vars=(t, self.prev_state, self.prev_output, inputs, outputs),
parallel_iterations=32,
swap_memory=True
)
self.prev_state.assign(state)
self.prev_output.assign(output)
return [final_outputs.read(t) for t in range(len(X))]
def get_initial_loop_state(self) -> LoopState:
rnn_output_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True,
size=0, name="decoder_outputs")
rnn_output_ta = rnn_output_ta.write(0, self.initial_state)
logit_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True,
size=0, name="logits")
outputs_ta = tf.TensorArray(dtype=tf.int32, dynamic_size=True,
size=0, name="outputs")
contexts = [tf.zeros([self.batch_size, a.context_vector_size])
for a in self.attentions]
mask_ta = tf.TensorArray(dtype=tf.bool, dynamic_size=True,
size=0, name="mask")
attn_loop_states = [a.initial_loop_state()
for a in self.attentions if a is not None]
# pylint: disable=not-callable
rnn_feedables = RNNFeedables(
# general:
step=0,
finished=tf.zeros([self.batch_size], dtype=tf.bool),
input_symbol=self.go_symbols,
prev_logits=tf.zeros([self.batch_size, len(self.vocabulary)]),
# rnn-specific:
prev_rnn_state=self.initial_state,
prev_rnn_output=self.initial_state,
prev_contexts=contexts)
rnn_histories = RNNHistories(
attention_histories=attn_loop_states,
# general:
logits=logit_ta,
decoder_outputs=rnn_output_ta,
outputs=outputs_ta,
mask=mask_ta)
# pylint: enable=not-callable
loop_constants = DecoderConstants(train_inputs=self.train_inputs)
return LoopState(
histories=rnn_histories,
constants=loop_constants,
feedables=rnn_feedables)
def get_initial_loop_state(self) -> LoopState:
rnn_output_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True,
size=0, name="decoder_outputs")
rnn_output_ta = rnn_output_ta.write(0, self.initial_state)
logit_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True,
size=0, name="logits")
outputs_ta = tf.TensorArray(dtype=tf.int32, dynamic_size=True,
size=0, name="outputs")
contexts = [tf.zeros([self.batch_size, a.context_vector_size])
for a in self.attentions]
mask_ta = tf.TensorArray(dtype=tf.bool, dynamic_size=True,
size=0, name="mask")
attn_loop_states = [a.initial_loop_state()
for a in self.attentions if a is not None]
# pylint: disable=not-callable
rnn_feedables = RNNFeedables(
# general:
step=0,
finished=tf.zeros([self.batch_size], dtype=tf.bool),
input_symbol=self.go_symbols,
prev_logits=tf.zeros([self.batch_size, len(self.vocabulary)]),
# rnn-specific:
prev_rnn_state=self.initial_state,
prev_rnn_output=self.initial_state,
prev_contexts=contexts)
rnn_histories = RNNHistories(
attention_histories=attn_loop_states,
# general:
logits=logit_ta,
decoder_outputs=rnn_output_ta,
outputs=outputs_ta,
mask=mask_ta)
# pylint: enable=not-callable
loop_constants = DecoderConstants(train_inputs=self.train_inputs)
return LoopState(
histories=rnn_histories,
constants=loop_constants,
feedables=rnn_feedables)
def _gru_encoder(cell, inputs, sequence_length, initial_state, dtype=None):
# Assume that the underlying cell is GRUCell-like
output_size = cell.output_size
dtype = dtype or inputs.dtype
batch = tf.shape(inputs)[0]
time_steps = tf.shape(inputs)[1]
zero_output = tf.zeros([batch, output_size], dtype)
if initial_state is None:
initial_state = cell.zero_state(batch, dtype)
input_ta = tf.TensorArray(dtype, time_steps,
tensor_array_name="input_array")
output_ta = tf.TensorArray(dtype, time_steps,
tensor_array_name="output_array")
input_ta = input_ta.unstack(tf.transpose(inputs, [1, 0, 2]))
def loop_func(t, out_ta, state):
inp_t = input_ta.read(t)
cell_output, new_state = cell(inp_t, state)
cell_output = _copy_through(t, sequence_length, zero_output,
cell_output)
new_state = _copy_through(t, sequence_length, state, new_state)
out_ta = out_ta.write(t, cell_output)
return t + 1, out_ta, new_state
time = tf.constant(0, dtype=tf.int32, name="time")
loop_vars = (time, output_ta, initial_state)
outputs = tf.while_loop(lambda t, *_: t < time_steps, loop_func,
loop_vars, parallel_iterations=32,
swap_memory=True)
output_final_ta = outputs[1]
final_state = outputs[2]
all_output = output_final_ta.stack()
all_output.set_shape([None, None, output_size])
all_output = tf.transpose(all_output, [1, 0, 2])
return all_output, final_state
def _compute_states(self):
_inputs = tf.transpose(self.inputs, [1, 0, 2])
x_ta = tf.TensorArray(tf.float32, size=self.length).unstack(_inputs)
h_ta_size = self.num_initial_states + self.length
initial_states = tf.transpose(self.initial_states, [1, 0, 2])
# infer_shapes=True is buggy and says that shape (?, num_hidden_units) is incompatible with
# shape (?, num_hidden_units). I've verified that they both have shape
# (batch_size, num_hidden_units). To avoid this, we'll set infer_shape=False and
# skip the consistency check entirely.
h_ta = tf.TensorArray(tf.float32, size=h_ta_size, clear_after_read=False, infer_shape=False)
h_ta = h_ta.unstack(initial_states)
def cond(t, h_ta):
return tf.less(t, self.length)
def body(t, h_ta):
h = h_ta.read(self.num_initial_states + t - 1)
x = x_ta.read(t)
num_units, input_size = self.num_hidden_units, self.input_size
with tf.variable_scope('pre_act'):
# Shape [batch_size, pre_act_mixture_delays.size, num_units]
h_history = tf.transpose(h_ta.gather(self.num_initial_states + t - self.pre_act_mixture_delays), [1, 0, 2])
# Shape [batch_size, pre_act_mixture_delays.size, 1]
coefs = tf.expand_dims(self._linear(h, x, self.pre_act_mixture_delays.size, scope='coefs'), 2)
coefs = tf.nn.softmax(coefs, dim=1)
# Shape [batch_size, num_units]
h_pre_act = tf.reduce_sum(coefs * h_history, axis=[1])
r = tf.nn.sigmoid(self._linear(h, x, num_units, scope='r'))
h_pre_act = r * h_pre_act
h_tilde = self.activation(self._linear(h_pre_act, x, num_units, scope='mist'))
h_ta_new = h_ta.write(self.num_initial_states + t, h_tilde)
return t + 1, h_ta_new
t = tf.constant(0)
_, h_ta = tf.while_loop(cond, body, [t, h_ta])
all_states = h_ta.stack()
states = tf.transpose(all_states[self.num_initial_states:], [1, 0, 2], name='states')
outputs = tf.identity(states, name='outputs')
return outputs, states
def decode(self, enc_outputs, enc_final_state):
with tf.variable_scope(self.decoder.scope):
def condition(time, all_outputs: tf.TensorArray, inputs, states):
def check_outputs_ends():
def has_end_word(t):
return tf.reduce_any(tf.equal(t, ANSWER_MAX))
output_label = tf.arg_max(all_outputs.stack(), 2)
output_label = tf.Print(output_label, [output_label], "Output Labels: ")
# The outputs are time-major, which means time is the first
# dimension. Here I need to check whether all the generated
# answers are ends with "</s>", so we need to transpose it
# to batch-major. Because `map_fn` only map function by the
# first dimension.
batch_major_outputs = tf.transpose(output_label, (1, 0))
all_outputs_ends = tf.reduce_all(tf.map_fn(has_end_word, batch_major_outputs, dtype=tf.bool))
return all_outputs_ends
# If the TensorArray has 0 size, stack() will trigger error,
# so I have to use condition function to check whether the
# size is 0.
all_ends = tf.cond(tf.equal(all_outputs.size(), 0),
lambda: tf.constant(False, tf.bool),
check_outputs_ends)
condition_result = tf.logical_and(tf.logical_not(all_ends), tf.less(time, ANSWER_MAX))
return condition_result
def body(time, all_outputs, inputs, state):
dec_outputs, dec_state, output_logits, next_input = self.decoder.step(inputs, state)
all_outputs = all_outputs.write(time, output_logits)
return time + 1, all_outputs, next_input, dec_state
output_ta = tensor_array_ops.TensorArray(dtype=tf.float32,
size=0,
dynamic_size=True,
element_shape=(None, config.DEC_VOCAB),
clear_after_read=False)
# with time-major data input, the batch size is the second dimension
batch_size = tf.shape(enc_outputs)[1]
zero_input = tf.ones(tf.expand_dims(batch_size, axis=0), dtype=tf.int32) * ANSWER_START
res = control_flow_ops.while_loop(
condition,
body,
loop_vars=[0, output_ta, self.decoder.zero_input(zero_input), enc_final_state],
)
final_outputs = res[1].stack()
final_outputs = tf.Print(final_outputs, [final_outputs], "Final Output: ")
final_state = res[3]
return final_outputs, final_state
def match(qstates, pstates, d, dropout=None):
# infer batch_size, passage length and question length
qlen, batch_size, _ = tf.unstack(tf.shape(qstates))
plen = tf.shape(pstates)[0]
# ouput projection params
# Wo = tf.get_variable('Wo', shape=[2*d, d], dtype=tf.float32)
# define rnn cell
# TODO : replace with LSTM
cell = rcell('lstm', num_units=2*d, dropout=dropout)
states = tf.TensorArray(dtype=tf.float32, size=plen+1, name='states',
clear_after_read=False)
outputs = tf.TensorArray(dtype=tf.float32, size=plen, name='outputs',
clear_after_read=False)
# set init state
#init_state = tf.zeros(dtype=tf.float32, shape=[batch_size, 2*d])
init_state = cell.zero_state(batch_size, tf.float32)
states = states.write(0, init_state)
def mlstm_step(i, states, outputs):
# get previous state
prev_state = states.read(i)
prev_state = tf.unstack(prev_state)
prev_state_tuple = tf.contrib.rnn.LSTMStateTuple(prev_state[0], prev_state[1])
prev_state_c = prev_state_tuple.c
# get attention weighted representation
ci = attention(qstates, pstates[i], prev_state_c, d)
# combine ci and input(i)
input_ = tf.concat([pstates[i], ci], axis=-1)
output, state = cell(input_, prev_state_tuple)
# save output, state
states = states.write(i+1, state)
outputs = outputs.write(i, output)
return (i+1, states, outputs)
# execute loop
#i = tf.constant(0)
c = lambda x, y, z : tf.less(x, plen)
b = lambda x, y, z : mlstm_step(x, y, z)
_, fstates, foutputs = tf.while_loop(c,b, [0, states, outputs])
return foutputs.stack(), project_lstm_states(fstates.stack()[1:], 4*d, d)
def naive_decoder(cell, enc_states, targets, start_token, end_token,
feed_previous=True, training=True, scope='naive_decoder.0'):
init_state = enc_states[-1]
timesteps = tf.shape(enc_states)[0]
# targets time major
targets_tm = tf.transpose(targets, [1,0,2])
states = tf.TensorArray(dtype=tf.float32, size=timesteps+1, name='states',
clear_after_read=False)
outputs = tf.TensorArray(dtype=tf.float32, size=timesteps+1, name='outputs',
clear_after_read=False)
def step(i, states, outputs):
# run one step
# read from TensorArray (states)
state_prev = states.read(i)
if is_lstm(cell):
# previous state <tensor> -> <LSTMStateTuple>
c, h = tf.unstack(state_prev)
state_prev = rnn.LSTMStateTuple(c,h)
if feed_previous:
input_ = outputs.read(i)
else:
input_ = targets_tm[i]
output, state = cell(input_, state_prev)
# add state, output to list
states = states.write(i+1, state)
outputs = outputs.write(i+1, output)
i = tf.add(i,1)
return i, states, outputs
with tf.variable_scope(scope):
# initial state
states = states.write(0, init_state)
# initial input
outputs = outputs.write(0, start_token)
i = tf.constant(0)
# Stop loop condition
if training:
c = lambda x, y, z : tf.less(x, timesteps)
else:
c = lambda x, y, z : tf.reduce_all(tf.not_equal(tf.argmax(z.read(x), axis=-1),
end_token))
# body
b = lambda x, y, z : step(x, y, z)
# execution
_, fstates, foutputs = tf.while_loop(c,b, [i, states, outputs])
return foutputs.stack()[1:] # add states; but why?
def uni_net_dynamic(cell, inputs, proj_dim=None, init_state=None, scope='uni_net_d0'):
# transpose to time major
inputs_tm = tf.transpose(inputs, [1,0,2], name='inputs_tm')
# infer timesteps and batch_size
timesteps, batch_size, _ = tf.unstack(tf.shape(inputs_tm))
# check if init_state is provided
# TODO : fix and add this
# init_state = init_state if init_state else cell.zero_state(batch_size,tf.float32)
if init_state is None:
init_state = cell.zero_state(batch_size, tf.float32)
states = tf.TensorArray(dtype=tf.float32, size=timesteps+1, name='states',
clear_after_read=False)
outputs = tf.TensorArray(dtype=tf.float32, size=timesteps, name='outputs',
clear_after_read=False)
def step(i, states, outputs):
# run one step
# read from TensorArray (states)
state_prev = states.read(i)
if is_lstm(cell):
# previous state <tensor> -> <LSTMStateTuple>
c, h = tf.unstack(state_prev)
state_prev = rnn.LSTMStateTuple(c,h)
output, state = cell(inputs_tm[i], state_prev)
# add state, output to list
states = states.write(i+1, state)
outputs = outputs.write(i, output)
i = tf.add(i,1)
return i, states, outputs
with tf.variable_scope(scope):
# initial state
states = states.write(0, init_state)
i = tf.constant(0)
# stopping condition
c = lambda x, y, z : tf.less(x, timesteps)
# body
b = lambda x, y, z : step(x, y, z)
# execution
_, fstates, foutputs = tf.while_loop(c,b, [i, states, outputs])
# if LSTM, project states
if is_lstm(cell):
d1 = 2*cell.state_size.c
d2 = proj_dim if proj_dim else d1//2
return foutputs.stack(), project_lstm_states(fstates.stack()[1:], d1, d2)
return foutputs.stack(), fstates.stack()[1:]
def build_graph(self):
"""
builds the computational graph that performs a step-by-step evaluation
of the input data batches
"""
self.unpacked_input_data = utility.unpack_into_tensorarray(self.input_data, 1, self.sequence_length)
outputs = tf.TensorArray(tf.float32, self.sequence_length)
read_weightings = tf.TensorArray(tf.float32, self.sequence_length)
write_weightings = tf.TensorArray(tf.float32, self.sequence_length)
write_vectors = tf.TensorArray(tf.float32, self.sequence_length)
key_vectors = tf.TensorArray(tf.float32, self.sequence_length)
beta_vectors = tf.TensorArray(tf.float32, self.sequence_length)
shift_vectors = tf.TensorArray(tf.float32, self.sequence_length)
gamma_vectors = tf.TensorArray(tf.float32, self.sequence_length)
gates_vectors = tf.TensorArray(tf.float32, self.sequence_length)
memory_vectors = tf.TensorArray(tf.float32, self.sequence_length)
controller_state = self.controller.get_state() if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1))
if not isinstance(controller_state, LSTMStateTuple):
controller_state = LSTMStateTuple(controller_state[0], controller_state[1])
memory_state = self.memory.init_memory()
final_results = None
with tf.variable_scope("Sequence_Loop") as scope:
time = tf.constant(0, dtype=tf.int32)
final_results = tf.while_loop(
cond=lambda time, *_: time < self.sequence_length,
body=self._loop_body,
loop_vars=(
time, memory_state, outputs,
read_weightings, write_weightings, controller_state, write_vectors,
key_vectors, beta_vectors, shift_vectors, gamma_vectors,
gates_vectors, memory_vectors
),
parallel_iterations=32,
swap_memory=True
)
dependencies = []
if self.controller.has_recurrent_nn:
dependencies.append(self.controller.update_state(final_results[5]))
with tf.control_dependencies(dependencies):
self.packed_output = utility.pack_into_tensor(final_results[2], axis=1)
# packed_memory_view and its content is just for debugging purposes.
self.packed_memory_view = {
'read_weightings': utility.pack_into_tensor(final_results[3], axis=1),
'write_weightings': utility.pack_into_tensor(final_results[4], axis=1),
'write_vectors': utility.pack_into_tensor(final_results[6], axis=1),
'key_vectors': utility.pack_into_tensor(final_results[7], axis=1),
'beta_vectors': utility.pack_into_tensor(final_results[8], axis=1),
'shift_vectors': utility.pack_into_tensor(final_results[9], axis=1),
'gamma_vectors': utility.pack_into_tensor(final_results[10], axis=1),
'gates_vectors': utility.pack_into_tensor(final_results[11], axis=1),
'memory_vectors': utility.pack_into_tensor(final_results[12], axis=1)
}