def switch(condition, then_tensor, else_tensor):
"""
Keras' implementation of switch for tensorflow uses tf.switch which accepts only scalar conditions.
It should use tf.select instead.
"""
if K.backend() == 'tensorflow':
import tensorflow as tf
condition_shape = condition.get_shape()
input_shape = then_tensor.get_shape()
if condition_shape[-1] != input_shape[-1] and condition_shape[-1] == 1:
# This means the last dim is an embedding dim. Keras does not mask this dimension. But tf wants
# the condition and the then and else tensors to be the same shape.
condition = K.dot(tf.cast(condition, tf.float32), tf.ones((1, input_shape[-1])))
return tf.select(tf.cast(condition, dtype=tf.bool), then_tensor, else_tensor)
else:
import theano.tensor as T
return T.switch(condition, then_tensor, else_tensor)
python类ones()的实例源码
seq2seq_helpers.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def decode(self, cell_dec, enc_final_state, output_size, output_embed_matrix, training, grammar_helper=None):
if self.config.use_dot_product_output:
output_layer = DotProductLayer(output_embed_matrix)
else:
output_layer = tf.layers.Dense(output_size, use_bias=False)
go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
if training:
output_ids_with_go = tf.concat([tf.expand_dims(go_vector, axis=1), self.output_placeholder], axis=1)
outputs = tf.nn.embedding_lookup([output_embed_matrix], output_ids_with_go)
helper = TrainingHelper(outputs, self.output_length_placeholder+1)
else:
helper = GreedyEmbeddingHelper(output_embed_matrix, go_vector, self.config.grammar.end)
if self.config.use_grammar_constraints:
decoder = GrammarBasicDecoder(self.config.grammar, cell_dec, helper, enc_final_state, output_layer=output_layer, training_output = self.output_placeholder if training else None,
grammar_helper=grammar_helper)
else:
decoder = BasicDecoder(cell_dec, helper, enc_final_state, output_layer=output_layer)
final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=self.max_length)
return final_outputs
def calculate_loss_distill_relabel(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_relabel"):
print("loss_distill_relabel")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
sum_labels = tf.cast(tf.reduce_sum(float_labels),dtype=tf.int32)
pos_distill, _ = tf.nn.top_k(tf.reshape(labels_distill,[-1]), k=sum_labels)
labels_true = tf.ones(tf.shape(labels))
labels_false = tf.zeros(tf.shape(labels))
labels_add = tf.where(tf.greater_equal(labels_distill, pos_distill[-1]), labels_true, labels_false)
print(labels_add.get_shape().as_list())
float_labels = float_labels+labels_add*(1.0-float_labels)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def prepare_reader(self, filename_queue, batch_size=1024):
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def test_encode(self):
inputs = tf.random_normal(
[self.batch_size, self.sequence_length, self.input_depth])
example_length = tf.ones(
self.batch_size, dtype=tf.int32) * self.sequence_length
encode_fn = rnn_encoder.UnidirectionalRNNEncoder(self.params, self.mode)
encoder_output = encode_fn(inputs, example_length)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
encoder_output_ = sess.run(encoder_output)
np.testing.assert_array_equal(encoder_output_.outputs.shape,
[self.batch_size, self.sequence_length, 32])
self.assertIsInstance(encoder_output_.final_state,
tf.contrib.rnn.LSTMStateTuple)
np.testing.assert_array_equal(encoder_output_.final_state.h.shape,
[self.batch_size, 32])
np.testing.assert_array_equal(encoder_output_.final_state.c.shape,
[self.batch_size, 32])
def _test_encode_with_params(self, params):
"""Tests the StackBidirectionalRNNEncoder with a specific cell"""
inputs = tf.random_normal(
[self.batch_size, self.sequence_length, self.input_depth])
example_length = tf.ones(
self.batch_size, dtype=tf.int32) * self.sequence_length
encode_fn = rnn_encoder.StackBidirectionalRNNEncoder(params, self.mode)
encoder_output = encode_fn(inputs, example_length)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
encoder_output_ = sess.run(encoder_output)
output_size = encode_fn.params["rnn_cell"]["cell_params"]["num_units"]
np.testing.assert_array_equal(
encoder_output_.outputs.shape,
[self.batch_size, self.sequence_length, output_size * 2])
return encoder_output_
def test_with_fixed_inputs(self):
inputs = tf.random_normal(
[self.batch_size, self.sequence_length, self.input_depth])
seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length
helper = decode_helper.TrainingHelper(
inputs=inputs, sequence_length=seq_length)
decoder_fn = self.create_decoder(
helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN)
initial_state = decoder_fn.cell.zero_state(
self.batch_size, dtype=tf.float32)
decoder_output, _ = decoder_fn(initial_state, helper)
#pylint: disable=E1101
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
decoder_output_ = sess.run(decoder_output)
np.testing.assert_array_equal(
decoder_output_.logits.shape,
[self.sequence_length, self.batch_size, self.vocab_size])
np.testing.assert_array_equal(decoder_output_.predicted_ids.shape,
[self.sequence_length, self.batch_size])
return decoder_output_
def _add_mh_correction(self, initial_position, initial_velocity, final_position, final_velocity):
""" Applies MH accept/reject correction. """
initial_energy = self._hamiltonian(initial_position, initial_velocity)
final_energy = self._hamiltonian(final_position, final_velocity)
accepted = self._metropolis_hastings_accept(initial_energy, final_energy)
accepted = tf.to_float(accepted)
# add acceptance to fetched values
self._accepted = accepted
if self.seek_step_sizes or self.fade_in_velocities:
burned_in = tf.to_float(self._burn_in_ratio == 1)
accepted = accepted * burned_in + tf.ones(shape=tf.shape(accepted)) * (1 - burned_in)
# apply MH decision
final_position = self._transpose_mul(final_position, accepted) + \
self._transpose_mul(initial_position, tf.ones(shape=tf.shape(accepted)) - accepted)
final_velocity = self._transpose_mul(final_velocity, accepted) + \
self._transpose_mul(-initial_velocity, tf.ones(shape=tf.shape(accepted)) - accepted)
return final_position, final_velocity
def get_optimizer(self, learning_rate = 0.001):
with tf.name_scope('loss'):
input_shape = tf.shape(self.inputs)
ones = tf.ones([input_shape[0], input_shape[1]])
loss = tf.contrib.seq2seq.sequence_loss(self.logits, self.targets,
ones)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients = optimizer.compute_gradients(loss)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) \
for grad, var in gradients if grad is not None]
optimizer_op = optimizer.apply_gradients(capped_gradients)
return optimizer_op, loss
def discriminate(self, image, Y):
print("Initializing the discriminator")
print("Y shape", Y.get_shape())
yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y]))
print("image shape", image.get_shape())
print("yb shape", yb.get_shape())
X = tf.concat([image, yb * tf.ones([self.batch_size, 24, 24, self.dim_y])],3)
print("X shape", X.get_shape())
h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' ))
print("h1 shape", h1.get_shape())
h1 = tf.concat([h1, yb * tf.ones([self.batch_size, 12, 12, self.dim_y])],3)
print("h1 shape", h1.get_shape())
h2 = lrelu(batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) )
print("h2 shape", h2.get_shape())
h2 = tf.reshape(h2, [self.batch_size, -1])
h2 = tf.concat([h2, Y], 1)
discri=tf.matmul(h2, self.discrim_W3 )
print("discri shape", discri.get_shape())
h3 = lrelu(batchnormalize(discri))
return h3
def samples_generator(self, batch_size):
Z = tf.placeholder(tf.float32, [batch_size, self.dim_z])
Y = tf.placeholder(tf.float32, [batch_size, self.dim_y])
yb = tf.reshape(Y, [batch_size, 1, 1, self.dim_y])
Z_ = tf.concat([Z,Y], 1)
h1 = tf.nn.relu(batchnormalize(tf.matmul(Z_, self.gen_W1)))
h1 = tf.concat([h1, Y], 1)
h2 = tf.nn.relu(batchnormalize(tf.matmul(h1, self.gen_W2)))
h2 = tf.reshape(h2, [batch_size,6,6,self.dim_W2])
h2 = tf.concat([h2, yb*tf.ones([batch_size, 6,6, self.dim_y])], 3)
output_shape_l3 = [batch_size,12,12,self.dim_W3]
h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1])
h3 = tf.nn.relu( batchnormalize(h3) )
h3 = tf.concat([h3, yb*tf.ones([batch_size, 12,12,self.dim_y])], 3)
output_shape_l4 = [batch_size,24,24,self.dim_channel]
h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1])
x = tf.nn.sigmoid(h4)
return Z, Y, x
def conv_cond_concat(x, y):
"""Concatenate conditioning vector on feature map axis."""
#print('input x:',x.get_shape().as_list())
#print('input y:',y.get_shape().as_list())
xshape=x.get_shape()
#tile by [1,64,64,1]
tile_shape=tf.stack([1,xshape[1],xshape[2],1])
tile_y=tf.tile(y,tile_shape)
#print('tile y:',tile_y.get_shape().as_list())
return tf.concat([x,tile_y],axis=3)
#x_shapes = x.get_shape()
#y_shapes = y.get_shape()
#return tf.concat([
#x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
a1_seq2seq_attention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def gru_forward(self, embedded_words,gru_cell, reverse=False):
"""
:param embedded_words:[None,sequence_length, self.embed_size]
:return:forward hidden state: a list.length is sentence_length, each element is [batch_size,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,axis=1) # it is a list,length is sentence_length, each element is [batch_size,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size,embed_size]
h_t = tf.ones((self.batch_size,self.hidden_size))
h_t_list = []
if reverse:
embedded_words_squeeze.reverse()
for time_step, Xt in enumerate(embedded_words_squeeze): # Xt: [batch_size,embed_size]
h_t = gru_cell(Xt,h_t) #h_t:[batch_size,embed_size]<------Xt:[batch_size,embed_size];h_t:[batch_size,embed_size]
h_t_list.append(h_t)
if reverse:
h_t_list.reverse()
return h_t_list # a list,length is sentence_length, each element is [batch_size,hidden_size]
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def gru_backward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return: backward hidden state:a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
embedded_words_squeeze.reverse() # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=int(tf.get_shape(embedded_words_squeeze[0])[0]) #h_t = tf.ones([self.batch_size*self.num_sentences, self.hidden_size])
h_t = tf.ones((self.batch_size * self.num_sentences, self.hidden_size))
h_t_backward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze):
h_t = self.gru_single_step_word_level(Xt, h_t)
h_t_backward_list.append(h_t)
h_t_backward_list.reverse() #ADD 2017.06.14
return h_t_backward_list
# forward gru for second level: sentence level
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def gru_forward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) #scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2)) # TODO
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,
h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
# backward gru for second level: sentence level
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def gru_backward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
sentence_representation_squeeze.reverse()
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) # scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2))
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
h_t_forward_list.reverse() #ADD 2017.06.14
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def gru_forward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return:forward hidden state: a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=embedded_words_squeeze[0].get_shape().dims[0]
h_t = tf.ones((self.batch_size * self.num_sentences,
self.hidden_size)) #TODO self.hidden_size h_t =int(tf.get_shape(embedded_words_squeeze[0])[0]) # tf.ones([self.batch_size*self.num_sentences, self.hidden_size]) # [batch_size*num_sentences,embed_size]
h_t_forward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze): # Xt: [batch_size*num_sentences,embed_size]
h_t = self.gru_single_step_word_level(Xt,h_t) # [batch_size*num_sentences,embed_size]<------Xt:[batch_size*num_sentences,embed_size];h_t:[batch_size*num_sentences,embed_size]
h_t_forward_list.append(h_t)
return h_t_forward_list # a list,length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
# backward gru for first level: word level
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def gru_backward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return: backward hidden state:a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
embedded_words_squeeze.reverse() # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=int(tf.get_shape(embedded_words_squeeze[0])[0]) #h_t = tf.ones([self.batch_size*self.num_sentences, self.hidden_size])
h_t = tf.ones((self.batch_size * self.num_sentences, self.hidden_size))
h_t_backward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze):
h_t = self.gru_single_step_word_level(Xt, h_t)
h_t_backward_list.append(h_t)
h_t_backward_list.reverse() #ADD 2017.06.14
return h_t_backward_list
# forward gru for second level: sentence level
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def gru_backward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
sentence_representation_squeeze.reverse()
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) # scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2))
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
h_t_forward_list.reverse() #ADD 2017.06.14
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
def _generate_labels(self, overlaps):
labels = tf.Variable(tf.ones(shape=(tf.shape(overlaps)[0],), dtype=tf.float32) * -1, trainable=False,
validate_shape=False)
gt_max_overlaps = tf.arg_max(overlaps, dimension=0)
anchor_max_overlaps = tf.arg_max(overlaps, dimension=1)
mask = tf.one_hot(anchor_max_overlaps, tf.shape(overlaps)[1], on_value=True, off_value=False)
max_overlaps = tf.boolean_mask(overlaps, mask)
if self._debug:
max_overlaps = tf.Print(max_overlaps, [max_overlaps])
labels = tf.scatter_update(labels, gt_max_overlaps, tf.ones((tf.shape(gt_max_overlaps)[0],)))
# TODO: extract config object
over_threshold_mask = tf.reshape(tf.where(max_overlaps > 0.5), (-1,))
if self._debug:
over_threshold_mask = tf.Print(over_threshold_mask, [over_threshold_mask], message='over threshold index : ')
labels = tf.scatter_update(labels, over_threshold_mask, tf.ones((tf.shape(over_threshold_mask)[0],)))
# TODO: support clobber positive in the origin implement
below_threshold_mask = tf.reshape(tf.where(max_overlaps < 0.3), (-1,))
if self._debug:
below_threshold_mask = tf.Print(below_threshold_mask, [below_threshold_mask], message='below threshold index : ')
labels = tf.scatter_update(labels, below_threshold_mask, tf.zeros((tf.shape(below_threshold_mask)[0],)))
return labels
def _build(self):
beta_init = tf.zeros(
shape=self._size,
dtype=D_TYPE
)
gamma_init = tf.ones(
shape=self._size,
dtype=D_TYPE
)
self._beta = tf.Variable(
name='beta',
initial_value=beta_init,
dtype=D_TYPE
)
self._gamma = tf.Variable(
name='gamma',
initial_value=gamma_init,
dtype=D_TYPE
)
def __init__(self, attention_units, memory, sequence_length=None, time_major=True, mode=0):
self.attention_units = attention_units
self.enc_units = memory.get_shape()[-1].value
if time_major:
memory = tf.transpose(memory, perm=(1,0,2))
self.enc_length = tf.shape(memory)[1]
self.batch_size = tf.shape(memory)[0]
self.mode = mode
self.mask = array_ops.sequence_mask(sequence_length, self.enc_length) if sequence_length is not None else None
self.tiny = -math.inf * tf.ones(shape=(self.batch_size, self.enc_length))
self.memory = tf.reshape(memory, (tf.shape(memory)[0], self.enc_length, 1, self.enc_units))
### pre-compute Uahj to minimize the computational cost
with tf.variable_scope('attention'):
Ua = tf.get_variable(name='Ua', shape=(1, 1, self.enc_units, self.attention_units))
self.hidden_feats = tf.nn.conv2d(self.memory, Ua, [1,1,1,1], "SAME")
def _meshgrid(self, height, width):
with tf.variable_scope('_meshgrid'):
# This should be equivalent to:
# x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
# np.linspace(-1, 1, height))
# ones = np.ones(np.prod(x_t.shape))
# grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
x_t = tf.matmul(tf.ones(shape=tf.pack([height, 1])),
tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
tf.ones(shape=tf.pack([1, width])))
x_t_flat = tf.reshape(x_t, (1, -1))
y_t_flat = tf.reshape(y_t, (1, -1))
ones = tf.ones_like(x_t_flat)
grid = tf.concat(0, [x_t_flat, y_t_flat, ones])
return grid
def compute_cost(self):
losses = tf.nn.seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost')
tf.summary.scalar('cost', self.cost)
def compute_cost(self):
losses = tf.nn.seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost')
tf.scalar_summary('cost', self.cost)
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def categories_loss(self, categories, layer):
gan = self.gan
loss = 0
batch_size = gan.batch_size()
def split(layer):
start = 0
ret = []
for category in categories:
count = int(category.get_shape()[1])
ret.append(tf.slice(layer, [0, start], [batch_size, count]))
start += count
return ret
for category,layer_s in zip(categories, split(layer)):
size = int(category.get_shape()[1])
category_prior = tf.ones([batch_size, size])*np.float32(1./size)
logli_prior = tf.reduce_sum(tf.log(category_prior + TINY) * category, axis=1)
layer_softmax = tf.nn.softmax(layer_s)
logli = tf.reduce_sum(tf.log(layer_softmax+TINY)*category, axis=1)
disc_ent = tf.reduce_mean(-logli_prior)
disc_cross_ent = tf.reduce_mean(-logli)
loss += disc_ent - disc_cross_ent
return loss
def getStatsEigen(self, stats=None):
if len(self.stats_eigen) == 0:
stats_eigen = {}
if stats is None:
stats = self.stats
tmpEigenCache = {}
with tf.device('/cpu:0'):
for var in stats:
for key in ['fprop_concat_stats', 'bprop_concat_stats']:
for stats_var in stats[var][key]:
if stats_var not in tmpEigenCache:
stats_dim = stats_var.get_shape()[1].value
e = tf.Variable(tf.ones(
[stats_dim]), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/e', trainable=False)
Q = tf.Variable(tf.diag(tf.ones(
[stats_dim])), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/Q', trainable=False)
stats_eigen[stats_var] = {'e': e, 'Q': Q}
tmpEigenCache[
stats_var] = stats_eigen[stats_var]
else:
stats_eigen[stats_var] = tmpEigenCache[
stats_var]
self.stats_eigen = stats_eigen
return self.stats_eigen
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def sample(self, n, max_length=None, z=None, **kwargs):
"""Sample with an optional conditional embedding `z`."""
if z is not None and z.shape[0].value != n:
raise ValueError(
'`z` must have a first dimension that equals `n` when given. '
'Got: %d vs %d' % (z.shape[0].value, n))
if self.hparams.conditional and z is None:
tf.logging.warning(
'Sampling from conditional model without `z`. Using random `z`.')
normal_shape = [n, self.hparams.z_size]
normal_dist = tf.contrib.distributions.Normal(
loc=tf.zeros(normal_shape), scale=tf.ones(normal_shape))
z = normal_dist.sample()
return self.decoder.sample(n, max_length, z, **kwargs)