def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
python类stack()的实例源码
def feed_network(self,data,keep_prob,chunk_size,n_chunks,dynamic):
# This code is copied from tflearn
sequence_lengths = None
if dynamic:
sequence_lengths = net.calc_seqlenth(data if isinstance(data, tf.Tensor) else tf.stack(data))
batch_size = tf.shape(data)[0]
weight_dropout = tf.nn.dropout(self._layer_weights, keep_prob)
rnn_dropout = rnn.core_rnn_cell.DropoutWrapper(self._gru_cell,output_keep_prob=keep_prob)
# Calculation Begin
input_shape = data.get_shape().as_list()
ndim = len(input_shape)
axis = [1, 0] + list(range(2,ndim))
data = tf.transpose(data,(axis))
sequence = tf.unstack(data)
outputs, states = rnn.static_rnn(rnn_dropout, sequence, dtype=tf.float32, sequence_length = sequence_lengths)
if dynamic:
outputs = tf.transpose(tf.stack(outputs), [1, 0, 2])
output = net.advanced_indexing_op(outputs, sequence_lengths)
else:
output = outputs[-1]
output = tf.add(tf.matmul(output,weight_dropout), self._layer_biases)
return output
def value_transition(self, curr_state, next_symbols, batch_size):
first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens
num_value_tokens = self.output_size - first_value_token
with tf.name_scope('grammar_transition'):
adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols)
assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols])
with tf.control_dependencies([assert1]):
transitions = tf.gather(tf.constant(self.transition_matrix), curr_state)
assert transitions.get_shape()[1:] == (self.output_size,)
indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1)
next_state = tf.gather_nd(transitions, indices)
assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state])
with tf.control_dependencies([assert2]):
return tf.identity(next_state)
def create_model(self, model_input, vocab_size, num_mixtures=None,
l2_penalty=1e-8, sub_scope="ddcc", original_input=None,
dropout=False, keep_prob=None, noise_level=None,
num_frames=None, **unused_params):
num_supports = FLAGS.num_supports
num_models = FLAGS.divergence_model_count
support_predictions = []
for i in xrange(num_models):
sub_prediction = self.sub_model(model_input,vocab_size, num_mixtures,
l2_penalty, sub_scope+"%d"%i,
dropout, keep_prob, noise_level)
support_predictions.append(sub_prediction)
support_predictions = tf.stack(support_predictions, axis=1)
main_predictions = tf.reduce_mean(support_predictions, axis=1)
return {"predictions": main_predictions, "support_predictions": support_predictions}
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def resize_axis(tensor, axis, new_size, fill_value=0):
tensor = tf.convert_to_tensor(tensor)
shape = tf.unstack(tf.shape(tensor))
pad_shape = shape[:]
pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
shape[axis] = tf.minimum(shape[axis], new_size)
shape = tf.stack(shape)
resized = tf.concat([
tf.slice(tensor, tf.zeros_like(shape), shape),
tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
], axis)
# Update shape.
new_shape = tensor.get_shape().as_list() # A copy is being made.
new_shape[axis] = new_size
resized.set_shape(new_shape)
return resized
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def trainable_initial_state(self, batch_size):
"""
Create a trainable initial state for the BasicLSTMCell
:param batch_size: number of samples per batch
:return: LSTMStateTuple
"""
def _create_initial_state(batch_size, state_size, trainable=True, initializer=tf.random_normal_initializer()):
with tf.device('/cpu:0'):
s = tf.get_variable('initial_state', shape=[1, state_size], dtype=tf.float32, trainable=trainable,
initializer=initializer)
state = tf.tile(s, tf.stack([batch_size] + [1]))
return state
with tf.variable_scope('initial_c'):
initial_c = _create_initial_state(batch_size, self._num_units)
with tf.variable_scope('initial_h'):
initial_h = _create_initial_state(batch_size, self._num_units)
return tf.contrib.rnn.LSTMStateTuple(initial_c, initial_h)
def __call__(self, inputs, steps):
def fn(zv, x):
"""
Transition for training, without Metropolis-Hastings.
`z` is the input state.
`v` is created as a dummy variable to allow output of v_, for training p(v).
:param x: variable only for specifying the number of steps
:return: next state `z_`, and the corresponding auxiliary variable `v_`.
"""
z, v = zv
v = tf.random_normal(shape=tf.stack([tf.shape(z)[0], self.network.v_dim]))
z_, v_ = self.network.forward([z, v])
return z_, v_
elems = tf.zeros([steps])
return tf.scan(fn, elems, inputs, back_prop=True)
def bilateral_slice(grid, guide, name=None):
"""Slices into a bilateral grid using the guide map.
Args:
grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs]
grid to slice from.
guide: (Tensor) [batch_size, h, w ] guide map to slice along.
name: (string) name for the operation.
Returns:
sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output.
"""
with tf.name_scope(name):
gridshape = grid.get_shape().as_list()
if len(gridshape) == 6:
_, _, _, _, n_out, n_in = gridshape
grid = tf.concat(tf.unstack(grid, None, axis=5), 4)
sliced = hdrnet_ops.bilateral_slice(grid, guide)
if len(gridshape) == 6:
sliced = tf.stack(tf.split(sliced, n_in, axis=3), axis=4)
return sliced
# pylint: enable=redefined-builtin
def discriminate(self, image, Y):
print("Initializing the discriminator")
print("Y shape", Y.get_shape())
yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y]))
print("image shape", image.get_shape())
print("yb shape", yb.get_shape())
X = tf.concat([image, yb * tf.ones([self.batch_size, 24, 24, self.dim_y])],3)
print("X shape", X.get_shape())
h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' ))
print("h1 shape", h1.get_shape())
h1 = tf.concat([h1, yb * tf.ones([self.batch_size, 12, 12, self.dim_y])],3)
print("h1 shape", h1.get_shape())
h2 = lrelu(batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) )
print("h2 shape", h2.get_shape())
h2 = tf.reshape(h2, [self.batch_size, -1])
h2 = tf.concat([h2, Y], 1)
discri=tf.matmul(h2, self.discrim_W3 )
print("discri shape", discri.get_shape())
h3 = lrelu(batchnormalize(discri))
return h3
def get_image_summary(img, idx=0):
"""
Make an image summary for 4d tensor image with index idx
"""
V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1))
V -= tf.reduce_min(V)
V /= tf.reduce_max(V)
V *= 255
img_w = tf.shape(img)[1]
img_h = tf.shape(img)[2]
V = tf.reshape(V, tf.stack((img_w, img_h, 1)))
V = tf.transpose(V, (2, 0, 1))
V = tf.reshape(V, tf.stack((-1, img_w, img_h, 1)))
return V
def deconv_2d_drop_bn_relu(inp, inp_chan, out_chan, kernel, stride=1, prob=1.0, name="", is_train=True):
weights = tf.Variable(tf.truncated_normal(
shape=[kernel, kernel, out_chan, inp_chan],
mean=0.0,
stddev=0.3),
name=name+"_weights")
bias = tf.Variable(tf.constant(
shape=[out_chan],
value=0.0),
name=name+"_bias")
inp_shape = tf.shape(inp)
deconv = tf.nn.conv2d_transpose(
value=inp,
filter=weights,
output_shape=tf.stack([inp_shape[0], inp_shape[1]*stride, inp_shape[2]*stride, out_chan]),
strides=[1, stride, stride, 1],
padding='VALID',
name=name+"_deconv")
drop = tf.nn.dropout(deconv, prob, name=name+"_drop")
out = tf.nn.relu(tf.contrib.layers.batch_norm(drop + bias, is_training=is_train))
return out, weights, bias
tf_utils.py 文件源码
项目:convolutional-pose-machines-tensorflow
作者: timctho
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def rotate_points(orig_points, angle, w, h):
"""Return rotated points
Args:
orig_points: 'Tensor' with shape [N,2], each entry is point (x,y)
angle: rotate radians
Returns:
'Tensor' with shape [N,2], with rotated points
"""
# rotation
rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h],
[-tf.sin(angle) / w, tf.cos(angle) / h]])
# shift coord
orig_points = tf.subtract(orig_points, 0.5)
orig_points = tf.stack([orig_points[:, 0] * w,
orig_points[:, 1] * h], axis=1)
print(orig_points)
rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5
return rotated_points
def conv_cond_concat(x, y):
"""Concatenate conditioning vector on feature map axis."""
#print('input x:',x.get_shape().as_list())
#print('input y:',y.get_shape().as_list())
xshape=x.get_shape()
#tile by [1,64,64,1]
tile_shape=tf.stack([1,xshape[1],xshape[2],1])
tile_y=tf.tile(y,tile_shape)
#print('tile y:',tile_y.get_shape().as_list())
return tf.concat([x,tile_y],axis=3)
#x_shapes = x.get_shape()
#y_shapes = y.get_shape()
#return tf.concat([
#x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
tf_output_shape=tf.stack(output_shape)
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=tf_output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
#deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), tf_output_shape)
if with_w:
return deconv, w, biases
else:
return deconv
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
shape = input_.get_shape().as_list()
#mat_shape=tf.stack([tf.shape(input_)[1],output_size])
mat_shape=[shape[1],output_size]
with tf.variable_scope(scope or "Linear"):
#matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
matrix = tf.get_variable("Matrix", mat_shape, tf.float32,
tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias", [output_size],
initializer=tf.constant_initializer(bias_start))
if with_w:
return tf.matmul(input_, matrix) + bias, matrix, bias
else:
return tf.matmul(input_, matrix) + bias
#minibatch method that improves on openai
#because it doesn't fix batchsize:
#TODO: recheck when not sleepy
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 41
收藏 0
点赞 0
评论 0
def answer_module(self):
""" Answer Module:generate an answer from the final memory vector.
Input:
hidden state from episodic memory module:[batch_size,hidden_size]
question:[batch_size, embedding_size]
"""
steps=self.sequence_length if self.decode_with_sequences else 1 #decoder for a list of tokens with sequence. e.g."x1 x2 x3 x4..."
a=self.m_T #init hidden state
y_pred=tf.zeros((self.batch_size,self.hidden_size)) #TODO usually we will init this as a special token '<GO>', you can change this line by pass embedding of '<GO>' from outside.
logits_list=[]
logits_return=None
for i in range(steps):
cell = rnn.GRUCell(self.hidden_size)
y_previous_q=tf.concat([y_pred,self.query_embedding],axis=1) #[batch_hidden_size*2]
_, a = cell( y_previous_q,a)
logits=tf.layers.dense(a,units=self.num_classes) #[batch_size,vocab_size]
logits_list.append(logits)
if self.decode_with_sequences:#need to get sequences.
logits_return = tf.stack(logits_list, axis=1) # [batch_size,sequence_length,num_classes]
else:#only need to get an answer, not sequences
logits_return = logits_list[0] #[batcj_size,num_classes]
return logits_return
def _rnn_attention_decoder(self, decoder_cell, training_wheels):
loop_fn = self._custom_rnn_loop_fn(decoder_cell.output_size, training_wheels=training_wheels)
decoder_outputs, _, (context_vectors_array, attention_logits_array, pointer_probability_array) = \
tf.nn.raw_rnn(decoder_cell,
loop_fn,
swap_memory=True)
decoder_outputs = decoder_outputs.stack()
decoder_outputs = tf.transpose(decoder_outputs, [1, 0, 2])
attention_logits = attention_logits_array.gather(tf.range(0, attention_logits_array.size() - 1))
attention_logits = tf.transpose(attention_logits, [1, 0, 2])
context_vectors = context_vectors_array.gather(tf.range(0, context_vectors_array.size() - 1))
context_vectors = tf.transpose(context_vectors, [1, 0, 2])
pointer_probabilities = pointer_probability_array.gather(tf.range(0, pointer_probability_array.size() - 1))
pointer_probabilities = tf.transpose(pointer_probabilities, [1, 0])
return decoder_outputs, context_vectors, attention_logits, pointer_probabilities
def kSparse(self, x, topk):
print 'run regular k-sparse'
dim = int(x.get_shape()[1])
if topk > dim:
warnings.warn('Warning: topk should not be larger than dim: %s, found: %s, using %s' % (dim, topk, dim))
topk = dim
k = dim - topk
values, indices = tf.nn.top_k(-x, k) # indices will be [[0, 1], [2, 1]], values will be [[6., 2.], [5., 4.]]
# We need to create full indices like [[0, 0], [0, 1], [1, 2], [1, 1]]
my_range = tf.expand_dims(tf.range(0, tf.shape(indices)[0]), 1) # will be [[0], [1]]
my_range_repeated = tf.tile(my_range, [1, k]) # will be [[0, 0], [1, 1]]
full_indices = tf.stack([my_range_repeated, indices], axis=2) # change shapes to [N, k, 1] and [N, k, 1], to concatenate into [N, k, 2]
full_indices = tf.reshape(full_indices, [-1, 2])
to_reset = tf.sparse_to_dense(full_indices, tf.shape(x), tf.reshape(values, [-1]), default_value=0., validate_indices=False)
res = tf.add(x, to_reset)
return res
def loss(self, img_batch, label_batch):
"""Create the network, run inference on the input batch and compute loss.
Args:
input_batch: batch of pre-processed images.
Returns:
Pixel-wise softmax loss.
"""
raw_output = self._create_network(tf.cast(img_batch, tf.float32), keep_prob=tf.constant(0.5))
prediction = tf.reshape(raw_output, [-1, n_classes])
# Need to resize labels and convert using one-hot encoding.
label_batch = self.prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]))
gt = tf.reshape(label_batch, [-1, n_classes])
# Pixel-wise softmax loss.
loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
reduced_loss = tf.reduce_mean(loss)
return reduced_loss
def feed_network(self,data,keep_prob,chunk_size,n_chunks, dynamic):
# This code is copied from tflearn
sequence_lengths = None
if dynamic:
sequence_lengths = net.calc_seqlenth(data if isinstance(data, tf.Tensor) else tf.stack(data))
batch_size = tf.shape(data)[0]
weight_dropout = tf.nn.dropout(self._layer_weights, keep_prob)
rnn_dropout = rnn.core_rnn_cell.DropoutWrapper(self._lstm_cell,output_keep_prob=keep_prob)
# Calculation Begin
input_shape = data.get_shape().as_list()
ndim = len(input_shape)
axis = [1, 0] + list(range(2,ndim))
data = tf.transpose(data,(axis))
sequence = tf.unstack(data)
outputs, states = rnn.static_rnn(rnn_dropout, sequence, dtype=tf.float32, sequence_length = sequence_lengths)
if dynamic:
outputs = tf.transpose(tf.stack(outputs), [1, 0, 2])
output = net.advanced_indexing_op(outputs, sequence_lengths)
else:
output = outputs[-1]
output = tf.add(tf.matmul(output,weight_dropout), self._layer_biases)
return output
def combine_gradients(tower_grads):
"""Calculate the combined gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been summed
across all towers.
"""
filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads]
final_grads = []
for i in xrange(len(filtered_grads[0])):
grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
grad = tf.stack([x[0] for x in grads], 0)
grad = tf.reduce_sum(grad, 0)
final_grads.append((grad, filtered_grads[0][i][1],))
return final_grads
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def generate_mask(img_mask_list, h, w, l):
img_masks, loss_masks = [], []
for i in range(l):
# generate image mask
img_mask = img_mask_list[i]
img_mask = tf.cast(tf.image.decode_png(img_mask), tf.float32)
img_mask = tf.reshape(img_mask, (h, w))
img_masks.append(img_mask)
# generate loss mask
s_total = h * w
s_mask = tf.reduce_sum(img_mask)
def f1(): return img_mask*((s_total-s_mask)/s_mask-1)+1
def f2(): return tf.zeros_like(img_mask)
def f3(): return tf.ones_like(img_mask)
loss_mask = tf.case([(tf.equal(s_mask, 0), f2), \
(tf.less(s_mask, s_total/2), f1)],
default=f3)
loss_masks.append(loss_mask)
return tf.stack(img_masks), tf.stack(loss_masks)
def calculate_allocation_weighting(self, usage_vector):
"""
:param: usage vector: tensor of shape [batch_size, memory_size]
:return: allocation tensor of shape [batch_size, memory_size]
"""
usage_vector = Memory.epsilon + (1 - Memory.epsilon) * usage_vector
# We're sorting the "-self.usage_vector" because top_k returns highest values and we need the lowest
highest_usage, inverse_indices = tf.nn.top_k(-usage_vector, k=self.memory_size)
lowest_usage = -highest_usage
allocation_scrambled = (1 - lowest_usage) * tf.cumprod(lowest_usage, axis=1, exclusive=True)
# allocation is not in the correct order. alloation[i] contains the sorted[i] value
# reversing the already inversed indices for each batch
indices = tf.stack([tf.invert_permutation(batch_indices) for batch_indices in tf.unstack(inverse_indices)])
allocation = tf.stack([tf.gather(mem, ind)
for mem, ind in
zip(tf.unstack(allocation_scrambled), tf.unstack(indices))])
return allocation
def read_and_decode(filename_queue, batch_size):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
feature = features()
feature = tf.parse_single_example(
serialized_example,
features = feature,
)
hr_image = tf.decode_raw(feature['hr_image'], tf.uint8)
height = tf.cast(feature['height'], tf.int32)
width = tf.cast(feature['width'], tf.int32)
print(height)
image_shape = tf.stack([128, 128,3 ])
hr_image = tf.reshape(hr_image, image_shape)
hr_image = tf.image.random_flip_left_right(hr_image)
hr_image = tf.image.random_contrast(hr_image, 0.5, 1.3)
hr_images = tf.train.shuffle_batch([hr_image], batch_size = batch_size, capacity = 30,
num_threads = 2,
min_after_dequeue = 10)
return hr_images
def combine_gradients(tower_grads):
"""Calculate the combined gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been summed
across all towers.
"""
filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads]
final_grads = []
for i in xrange(len(filtered_grads[0])):
grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
grad = tf.stack([x[0] for x in grads], 0)
grad = tf.reduce_sum(grad, 0)
final_grads.append((grad, filtered_grads[0][i][1],))
return final_grads
def _get_top_k(scores1, scores2, k, max_span_size, support2question):
max_support_length = tf.shape(scores1)[1]
doc_idx, pointer1, topk_scores1 = segment_top_k(scores1, support2question, k)
# [num_questions * beam_size]
doc_idx_flat = tf.reshape(doc_idx, [-1])
pointer_flat1 = tf.reshape(pointer1, [-1])
# [num_questions * beam_size, support_length]
scores_gathered2 = tf.gather(scores2, doc_idx_flat)
if max_span_size < 0:
pointer_flat1, max_span_size = pointer_flat1 + max_span_size + 1, -max_span_size
left_mask = misc.mask_for_lengths(tf.cast(pointer_flat1, tf.int32),
max_support_length, mask_right=False)
right_mask = misc.mask_for_lengths(tf.cast(pointer_flat1 + max_span_size, tf.int32),
max_support_length)
scores_gathered2 = scores_gathered2 + left_mask + right_mask
pointer2 = tf.argmax(scores_gathered2, axis=1, output_type=tf.int32)
topk_score2 = tf.gather_nd(scores2, tf.stack([doc_idx_flat, pointer2], 1))
return doc_idx, pointer1, tf.reshape(pointer2, [-1, k]), topk_scores1 + tf.reshape(topk_score2, [-1, k])
def distance_biases(time_steps, window_size=10, reuse=False):
"""
Return a 2-d tensor with the values of the distance biases to be applied
on the intra-attention matrix of size sentence_size
Args:
time_steps: tensor scalar
window_size: window size
reuse: reuse variables
Returns:
2-d tensor (time_steps, time_steps)
"""
with tf.variable_scope('distance-bias', reuse=reuse):
# this is d_{i-j}
distance_bias = tf.get_variable('dist_bias', [window_size], initializer=tf.zeros_initializer())
r = tf.range(0, time_steps)
r_matrix = tf.tile(tf.reshape(r, [1, -1]), tf.stack([time_steps, 1]))
raw_idxs = r_matrix - tf.reshape(r, [-1, 1])
clipped_idxs = tf.clip_by_value(raw_idxs, 0, window_size - 1)
values = tf.nn.embedding_lookup(distance_bias, clipped_idxs)
return values