def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
python类tile()的实例源码
def calculate_loss(self, predictions, support_predictions, labels, **unused_params):
"""
support_predictions batch_size x num_models x num_classes
predictions = tf.reduce_mean(support_predictions, axis=1)
"""
model_count = tf.shape(support_predictions)[1]
vocab_size = tf.shape(support_predictions)[2]
mean_predictions = tf.reduce_mean(support_predictions, axis=1, keep_dims=True)
support_labels = tf.tile(tf.expand_dims(tf.cast(labels, dtype=tf.float32), axis=1), multiples=[1,model_count,1])
support_means = tf.stop_gradient(tf.tile(mean_predictions, multiples=[1,model_count,1]))
support_predictions = tf.reshape(support_predictions, shape=[-1,model_count*vocab_size])
support_labels = tf.reshape(support_labels, shape=[-1,model_count*vocab_size])
support_means = tf.reshape(support_means, shape=[-1,model_count*vocab_size])
ce_loss_fn = CrossEntropyLoss()
# The cross entropy between predictions and ground truth
cross_entropy_loss = ce_loss_fn.calculate_loss(support_predictions, support_labels, **unused_params)
# The cross entropy between predictions and mean predictions
divergence = ce_loss_fn.calculate_loss(support_predictions, support_means, **unused_params)
loss = cross_entropy_loss * (1.0 - FLAGS.support_loss_percent) - divergence * FLAGS.support_loss_percent
return loss
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def trainable_initial_state(self, batch_size):
"""
Create a trainable initial state for the BasicLSTMCell
:param batch_size: number of samples per batch
:return: LSTMStateTuple
"""
def _create_initial_state(batch_size, state_size, trainable=True, initializer=tf.random_normal_initializer()):
with tf.device('/cpu:0'):
s = tf.get_variable('initial_state', shape=[1, state_size], dtype=tf.float32, trainable=trainable,
initializer=initializer)
state = tf.tile(s, tf.stack([batch_size] + [1]))
return state
with tf.variable_scope('initial_c'):
initial_c = _create_initial_state(batch_size, self._num_units)
with tf.variable_scope('initial_h'):
initial_h = _create_initial_state(batch_size, self._num_units)
return tf.contrib.rnn.LSTMStateTuple(initial_c, initial_h)
def _create_decoder(self, encoder_output, features, _labels):
attention_class = locate(self.params["attention.class"]) or \
getattr(decoders.attention, self.params["attention.class"])
attention_layer = attention_class(
params=self.params["attention.params"], mode=self.mode)
# If the input sequence is reversed we also need to reverse
# the attention scores.
reverse_scores_lengths = None
if self.params["source.reverse"]:
reverse_scores_lengths = features["source_len"]
if self.use_beam_search:
reverse_scores_lengths = tf.tile(
input=reverse_scores_lengths,
multiples=[self.params["inference.beam_search.beam_width"]])
return self.decoder_class(
params=self.params["decoder.params"],
mode=self.mode,
vocab_size=self.target_vocab_info.total_size,
attention_values=encoder_output.attention_values,
attention_values_length=encoder_output.attention_values_length,
attention_keys=encoder_output.outputs,
attention_fn=attention_layer,
reverse_scores_lengths=reverse_scores_lengths)
def _validate(self, machine, n=10):
N = n * n
# same row same z
z = tf.random_normal(shape=[n, self.arch['z_dim']])
z = tf.tile(z, [1, n])
z = tf.reshape(z, [N, -1])
z = tf.Variable(z, trainable=False, dtype=tf.float32)
# same column same y
y = tf.range(0, 10, 1, dtype=tf.int64)
y = tf.reshape(y, [-1, 1])
y = tf.tile(y, [n, 1])
Xh = machine.generate(z, y) # 100, 64, 64, 3
# Xh = gray2jet(Xh)
# Xh = make_png_thumbnail(Xh, n)
Xh = make_png_jet_thumbnail(Xh, n)
return Xh
def _validate(self, machine, n=10):
N = n * n
# same row same z
z = tf.random_normal(shape=[n, self.arch['z_dim']])
z = tf.tile(z, [1, n])
z = tf.reshape(z, [N, -1])
z = tf.Variable(z, trainable=False, dtype=tf.float32)
# same column same y
y = tf.range(0, 10, 1, dtype=tf.int64)
y = tf.reshape(y, [-1,])
y = tf.tile(y, [n,])
Xh = machine.generate(z, y) # 100, 64, 64, 3
Xh = make_png_thumbnail(Xh, n)
return Xh
def get_image(filepath, image_target, image_size):
img = imread(filepath).astype(np.float)
h_origin, w_origin = img.shape[:2]
if image_target > h_origin or image_target > w_origin:
image_target = min(h_origin, w_origin)
h_drop = int((h_origin - image_target)/2)
w_drop = int((w_origin - image_target)/2)
if img.ndim == 2:
img = np.tile(img.reshape(h_origin, w_origin, 1), (1,1,3))
img_crop = img[h_drop:h_drop+image_target, w_drop:w_drop+image_target, :]
img_resize = imresize(img_crop, [image_size, image_size])
return np.array(img_resize)/127.5 - 1.
def conv_cond_concat(x, y):
"""Concatenate conditioning vector on feature map axis."""
#print('input x:',x.get_shape().as_list())
#print('input y:',y.get_shape().as_list())
xshape=x.get_shape()
#tile by [1,64,64,1]
tile_shape=tf.stack([1,xshape[1],xshape[2],1])
tile_y=tf.tile(y,tile_shape)
#print('tile y:',tile_y.get_shape().as_list())
return tf.concat([x,tile_y],axis=3)
#x_shapes = x.get_shape()
#y_shapes = y.get_shape()
#return tf.concat([
#x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
def rnn_story(self):
"""
run rnn for story to get last hidden state
input is: story: [batch_size,story_length,embed_size]
:return: last hidden state. [batch_size,embed_size]
"""
# 1.split input to get lists.
input_split=tf.split(self.story_embedding,self.story_length,axis=1) #a list.length is:story_length.each element is:[batch_size,1,embed_size]
input_list=[tf.squeeze(x,axis=1) for x in input_split] #a list.length is:story_length.each element is:[batch_size,embed_size]
# 2.init keys(w_all) and values(h_all) of memory
h_all=tf.get_variable("hidden_states",shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
w_all=tf.get_variable("keys", shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
# 3.expand keys and values to prepare operation of rnn
w_all_expand=tf.tile(tf.expand_dims(w_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
h_all_expand=tf.tile(tf.expand_dims(h_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
# 4. run rnn using input with cell.
for i,input in enumerate(input_list):
h_all_expand=self.cell(input,h_all_expand,w_all_expand,i) #w_all:[batch_size,block_size,hidden_size]; h_all:[batch_size,block_size,hidden_size]
return h_all_expand #[batch_size,block_size,hidden_size]
def SoftArgmin(outputLeft, outputRight, D=192):
left_result_D = outputLeft
right_result_D = outputRight
left_result_D_squeeze = tf.squeeze(left_result_D, axis=[0, 4])
right_result_D_squeeze = tf.squeeze(right_result_D, axis=[0, 4]) # 192 256 512
left_result_softmax = tf.nn.softmax(left_result_D_squeeze, dim=0)
right_result_softmax = tf.nn.softmax(right_result_D_squeeze, dim=0) # 192 256 512
d_grid = tf.cast(tf.range(D), tf.float32)
d_grid = tf.reshape(d_grid, (-1, 1, 1))
d_grid = tf.tile(d_grid, [1, 256, 512])
left_softargmin = tf.reduce_sum(tf.multiply(left_result_softmax, d_grid), axis=0, keep_dims=True)
right_softargmin = tf.reduce_sum(tf.multiply(right_result_softmax, d_grid), axis=0, keep_dims=True)
return left_softargmin, right_softargmin
def _attention(self, prev_decoder_state, prev_embedding):
with tf.variable_scope('attention') as scope:
# e = score of shape [batch_size, output_seq_length, input_seq_length], e_{ij} = score(s_{i-1}, h_j)
# e_i = score of shape [batch_size, input_seq_length], e_ij = score(prev_decoder_state, h_j)
e_i = self._score(prev_decoder_state, prev_embedding)
# alpha_i = softmax(e_i) of shape [batch_size, input_seq_length]
alpha_i = tf.nn.softmax(e_i)
resized_alpha_i = tf.reshape(tf.tile(alpha_i, [1, self.encoder_output_size]),
[self.batch_size, -1, self.encoder_output_size])
if self.mode == 'decode':
c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.pre_computed_encoder_states_placeholder), axis=1)
else:
c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.encoder_outputs), axis=1)
return c_i, e_i
def ar_layer(z0,hps,n_hidden=10):
''' old iaf layer '''
# Repeat input
z_rep = tf.reshape(tf.tile(z0,[1,hps.z_size]),[-1,hps.z_size])
# make mask
mask = tf.sequence_mask(tf.range(hps.z_size),hps.z_size)[None,:,:]
mask = tf.reshape(tf.tile(mask,[tf.shape(z0)[0],1,1]),[-1,hps.z_size])
# predict mu and sigma
z_mask = z_rep * tf.to_float(mask)
mid = slim.fully_connected(z_mask,n_hidden,activation_fn=tf.nn.relu)
pars = slim.fully_connected(mid,2,activation_fn=None)
pars = tf.reshape(pars,[-1,hps.z_size,2])
mu, log_sigma = tf.unstack(pars,axis=2)
return mu, log_sigma
def kSparse(self, x, topk):
print 'run regular k-sparse'
dim = int(x.get_shape()[1])
if topk > dim:
warnings.warn('Warning: topk should not be larger than dim: %s, found: %s, using %s' % (dim, topk, dim))
topk = dim
k = dim - topk
values, indices = tf.nn.top_k(-x, k) # indices will be [[0, 1], [2, 1]], values will be [[6., 2.], [5., 4.]]
# We need to create full indices like [[0, 0], [0, 1], [1, 2], [1, 1]]
my_range = tf.expand_dims(tf.range(0, tf.shape(indices)[0]), 1) # will be [[0], [1]]
my_range_repeated = tf.tile(my_range, [1, k]) # will be [[0, 0], [1, 1]]
full_indices = tf.stack([my_range_repeated, indices], axis=2) # change shapes to [N, k, 1] and [N, k, 1], to concatenate into [N, k, 2]
full_indices = tf.reshape(full_indices, [-1, 2])
to_reset = tf.sparse_to_dense(full_indices, tf.shape(x), tf.reshape(values, [-1]), default_value=0., validate_indices=False)
res = tf.add(x, to_reset)
return res
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def update_link_matrix(self, link_matrix_old, precedence_weighting_old, write_weighting):
"""
Updating the link matrix takes some effort (in order to vectorize the implementation)
Instead of the original index-by-index operation, it's all done at once.
:param link_matrix_old: from previous time step, shape [batch_size, memory_size, memory_size]
:param precedence_weighting_old: from previous time step, shape [batch_size, memory_size]
:param write_weighting: from current time step, shape [batch_size, memory_size]
:return: updated link matrix
"""
expanded = tf.expand_dims(write_weighting, axis=2)
# vectorizing the paper's original implementation
w = tf.tile(expanded, [1, 1, self.memory_size]) # shape [batch_size, memory_size, memory_size]
# shape of w_transpose is the same: [batch_size, memory_size, memory_size]
w_transp = tf.tile(tf.transpose(expanded, [0, 2, 1]), [1, self.memory_size, 1])
# in einsum, m and n are the same dimension because tensorflow doesn't support duplicated subscripts. Why?
lm = (1 - w - w_transp) * link_matrix_old + tf.einsum("bn,bm->bmn", precedence_weighting_old, write_weighting)
lm *= (1 - tf.eye(self.memory_size, batch_shape=[self.batch_size])) # making sure self links are off
return tf.identity(lm, name="Link_matrix")
def __init__(self, directory, num_act, mean_path, num_threads=1, capacity=1e5, batch_size=32,
scale=(1.0/255.0), s_t_shape=[84, 84, 4], x_t_1_shape=[84, 84, 1], colorspace='gray'):
self.scale = scale
self.s_t_shape = s_t_shape
self.x_t_1_shape = x_t_1_shape
# Load image mean
mean = np.load(os.path.join(mean_path))
# Prepare data flow
s_t, a_t, x_t_1 = _read_and_decode(directory,
s_t_shape=s_t_shape,
num_act=num_act,
x_t_1_shape=x_t_1_shape)
self.mean = mean
self.s_t_batch, self.a_t_batch, self.x_t_1_batch = tf.train.shuffle_batch([s_t, a_t, x_t_1],
batch_size=batch_size, capacity=capacity,
min_after_dequeue=int(capacity*0.25),
num_threads=num_threads)
# Subtract image mean (according to J Oh design)
self.mean_const = tf.constant(mean, dtype=tf.float32)
print(self.mean_const.get_shape())
self.s_t_batch = (self.s_t_batch - tf.tile(self.mean_const, [1, 1, 4])) * scale
self.x_t_1_batch = (self.x_t_1_batch - self.mean_const) * scale
def distance_biases(time_steps, window_size=10, reuse=False):
"""
Return a 2-d tensor with the values of the distance biases to be applied
on the intra-attention matrix of size sentence_size
Args:
time_steps: tensor scalar
window_size: window size
reuse: reuse variables
Returns:
2-d tensor (time_steps, time_steps)
"""
with tf.variable_scope('distance-bias', reuse=reuse):
# this is d_{i-j}
distance_bias = tf.get_variable('dist_bias', [window_size], initializer=tf.zeros_initializer())
r = tf.range(0, time_steps)
r_matrix = tf.tile(tf.reshape(r, [1, -1]), tf.stack([time_steps, 1]))
raw_idxs = r_matrix - tf.reshape(r, [-1, 1])
clipped_idxs = tf.clip_by_value(raw_idxs, 0, window_size - 1)
values = tf.nn.embedding_lookup(distance_bias, clipped_idxs)
return values
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
end_of_sequence_id, max_target_sequence_length,
vocab_size, output_layer, batch_size, keep_prob):
start_tokens = tf.tile(
tf.constant([start_of_sequence_id], dtype=tf.int32), [batch_size], name='start_tokens')
# Define the helper
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
dec_embeddings,
start_tokens,
end_of_sequence_id)
# Define the decoder
decoder = tf.contrib.seq2seq.BasicDecoder(
dec_cell,
helper,
encoder_state,
output_layer)
# Run the decoder
infer_decoder_output, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
impute_finished=True,
maximum_iterations=max_target_sequence_length)
return infer_decoder_output
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def get_output_for(self, input, **kwargs):
input_shape = tf.shape(input)
n_batches = input_shape[0]
n_steps = input_shape[1]
input = tf.reshape(input, tf.pack([n_batches, n_steps, -1]))
if 'recurrent_state' in kwargs and self in kwargs['recurrent_state']:
h0s = kwargs['recurrent_state'][self]
else:
h0s = tf.tile(
tf.reshape(self.h0, (1, self.num_units)),
(n_batches, 1)
)
# flatten extra dimensions
shuffled_input = tf.transpose(input, (1, 0, 2))
hs = tf.scan(
self.step,
elems=shuffled_input,
initializer=h0s
)
shuffled_hs = tf.transpose(hs, (1, 0, 2))
if 'recurrent_state_output' in kwargs:
kwargs['recurrent_state_output'][self] = shuffled_hs
return shuffled_hs
def get_output_for(self, input, **kwargs):
input_shape = tf.shape(input)
n_batches = input_shape[0]
n_steps = input_shape[1]
input = tf.reshape(input, tf.pack([n_batches, n_steps, -1]))
c0s = tf.tile(
tf.reshape(self.c0, (1, self.num_units)),
(n_batches, 1)
)
h0s = self.nonlinearity(c0s)
# flatten extra dimensions
shuffled_input = tf.transpose(input, (1, 0, 2))
hcs = tf.scan(
self.step,
elems=shuffled_input,
initializer=tf.concat(1, [h0s, c0s])
)
shuffled_hcs = tf.transpose(hcs, (1, 0, 2))
shuffled_hs = shuffled_hcs[:, :, :self.num_units]
shuffled_cs = shuffled_hcs[:, :, self.num_units:]
return shuffled_hs
def create(self, args):
self.inputsa = hg.inputs.image_loader.ImageLoader(args.batch_size)
self.inputsa.create(args.directory,
channels=channels,
format=args.format,
crop=args.crop,
width=width,
height=height,
resize=True)
xa = self.inputsa.x
xb = tf.tile(tf.image.rgb_to_grayscale(xa), [1,1,1,3])
self.xa = xa
self.x = xa #TODO remove
self.xb = xb
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32)
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def inner_tile(tensor, shape, freq):
"""
"""
if isinstance(freq, int):
freq = freq_for_shape(freq, shape)
small_shape = [int(shape[0] / freq[0]), int(shape[1] / freq[1]), shape[2]]
y_index = tf.tile(column_index(small_shape) * freq[0], [freq[0], freq[0]])
x_index = tf.tile(row_index(small_shape) * freq[1], [freq[0], freq[0]])
tiled = tf.gather_nd(tensor, tf.stack([y_index, x_index], 2))
tiled = resample(tiled, shape, spline_order=1)
return tiled