def _add_cross_entropy(labels, logits, pref):
"""Compute average cross entropy and add to loss collection.
Args:
labels: Single dimension labels from distorted_inputs() or inputs().
logits: Output map from inference().
pref: Either 'c' or 's', for contours or segments, respectively.
"""
with tf.variable_scope('{}_cross_entropy'.format(pref)) as scope:
class_prop = C_CLASS_PROP if pref == 'c' else S_CLASS_PROP
weight_per_label = tf.scalar_mul(class_prop, tf.cast(tf.equal(labels, 0),
tf.float32)) + \
tf.scalar_mul(1.0 - class_prop, tf.cast(tf.equal(labels, 1),
tf.float32))
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
labels=tf.squeeze(labels, squeeze_dims=[3]), logits=logits)
cross_entropy_weighted = tf.multiply(weight_per_label, cross_entropy)
cross_entropy_mean = tf.reduce_mean(cross_entropy_weighted, name=scope.name)
tf.add_to_collection('losses', cross_entropy_mean)
python类squeeze()的实例源码
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def _get_loss(self,labels):
with tf.name_scope("Loss"):
"""
with tf.name_scope("logloss"):
logit = tf.squeeze(tf.nn.sigmoid(self.logit))
self.loss = tf.reduce_mean(self._logloss(labels, logit))
"""
with tf.name_scope("L2_loss"):
if self.flags.lambdax:
lambdax = self.flags.lambdax
else:
lambdax = 0
self.l2loss = lambdax*tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
with tf.name_scope("dice_coef"):
#yp_label = tf.cast(logit>self.flags.threshold, tf.float32)
logit = tf.squeeze(self.logit)
self.acc = tf.reduce_mean(self._dice_coef(labels,logit))
self.metric = "dice_coef"
self.loss = -self.acc
with tf.name_scope("summary"):
if self.flags.visualize:
tf.summary.scalar(name='dice coef', tensor=self.acc, collections=[tf.GraphKeys.SCALARS])
def _build(self):
V = self.V
M = self.flags.embedding_size # 64
H = self.flags.num_units
C = self.flags.classes
netname = "CBOW"
with tf.variable_scope(netname):
self.inputs = tf.placeholder(dtype=tf.int32,shape=[None, None]) #[B,S]
layer_name = "{}/embedding".format(netname)
x = self._get_embedding(layer_name, self.inputs, V, M, reuse=False) # [B, S, M]
netname = "RNN"
cell_name = self.flags.cell
with tf.variable_scope(netname):
args = {"num_units":H,"num_proj":C}
cell_f = self._get_rnn_cell(cell_name=cell_name, args=args)
cell_b = self._get_rnn_cell(cell_name=cell_name, args=args)
(out_f, out_b), _ = tf.nn.bidirectional_dynamic_rnn(cell_f,cell_b,x,dtype=tf.float32)
#logit = (out_f[:,-1,:] + out_b[:,-1,:])*0.5 # [B,1,C]
logit = tf.reduce_mean(out_f+out_b,axis=1)
logit = tf.squeeze(logit) # [B,C]
self.logit = logit
def rnn_story(self):
"""
run rnn for story to get last hidden state
input is: story: [batch_size,story_length,embed_size]
:return: last hidden state. [batch_size,embed_size]
"""
# 1.split input to get lists.
input_split=tf.split(self.story_embedding,self.story_length,axis=1) #a list.length is:story_length.each element is:[batch_size,1,embed_size]
input_list=[tf.squeeze(x,axis=1) for x in input_split] #a list.length is:story_length.each element is:[batch_size,embed_size]
# 2.init keys(w_all) and values(h_all) of memory
h_all=tf.get_variable("hidden_states",shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
w_all=tf.get_variable("keys", shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
# 3.expand keys and values to prepare operation of rnn
w_all_expand=tf.tile(tf.expand_dims(w_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
h_all_expand=tf.tile(tf.expand_dims(h_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
# 4. run rnn using input with cell.
for i,input in enumerate(input_list):
h_all_expand=self.cell(input,h_all_expand,w_all_expand,i) #w_all:[batch_size,block_size,hidden_size]; h_all:[batch_size,block_size,hidden_size]
return h_all_expand #[batch_size,block_size,hidden_size]
a2_poistion_wise_feed_forward.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def position_wise_feed_forward_fn(self):
"""
x: [batch,sequence_length,d_model]
:return: [batch,sequence_length,d_model]
"""
output=None
#1.conv1
input=tf.expand_dims(self.x,axis=3) #[batch,sequence_length,d_model,1]
# conv2d.input: [None,sentence_length,embed_size,1]. filter=[filter_size,self.embed_size,1,self.num_filters]
# output with padding:[None,sentence_length,1,1]
filter1 = tf.get_variable("filter1"+str(self.layer_index) , shape=[1, self.d_model, 1, 1],initializer=self.initializer)
ouput_conv1=tf.nn.conv2d(input,filter1,strides=[1,1,1,1],padding="VALID",name="conv1") #[batch,sequence_length,1,1]
print("output_conv1:",ouput_conv1)
#2.conv2
filter2 = tf.get_variable("filter2"+str(self.layer_index), [1, 1, 1, self.d_model], initializer=self.initializer)
output_conv2=tf.nn.conv2d(ouput_conv1,filter2,strides=[1,1,1,1],padding="VALID",name="conv2") #[batch,sequence_length,1,d_model]
output=tf.squeeze(output_conv2) #[batch,sequence_length,d_model]
return output #[batch,sequence_length,d_model]
#test function of position_wise_feed_forward_fn
#time spent:OLD VERSION: length=8000,time spent:35.6s; NEW VERSION:0.03s
a1_seq2seq_attention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def gru_forward(self, embedded_words,gru_cell, reverse=False):
"""
:param embedded_words:[None,sequence_length, self.embed_size]
:return:forward hidden state: a list.length is sentence_length, each element is [batch_size,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,axis=1) # it is a list,length is sentence_length, each element is [batch_size,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size,embed_size]
h_t = tf.ones((self.batch_size,self.hidden_size))
h_t_list = []
if reverse:
embedded_words_squeeze.reverse()
for time_step, Xt in enumerate(embedded_words_squeeze): # Xt: [batch_size,embed_size]
h_t = gru_cell(Xt,h_t) #h_t:[batch_size,embed_size]<------Xt:[batch_size,embed_size];h_t:[batch_size,embed_size]
h_t_list.append(h_t)
if reverse:
h_t_list.reverse()
return h_t_list # a list,length is sentence_length, each element is [batch_size,hidden_size]
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def gru_forward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return:forward hidden state: a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=embedded_words_squeeze[0].get_shape().dims[0]
h_t = tf.ones((self.batch_size * self.num_sentences,
self.hidden_size)) #TODO self.hidden_size h_t =int(tf.get_shape(embedded_words_squeeze[0])[0]) # tf.ones([self.batch_size*self.num_sentences, self.hidden_size]) # [batch_size*num_sentences,embed_size]
h_t_forward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze): # Xt: [batch_size*num_sentences,embed_size]
h_t = self.gru_single_step_word_level(Xt,h_t) # [batch_size*num_sentences,embed_size]<------Xt:[batch_size*num_sentences,embed_size];h_t:[batch_size*num_sentences,embed_size]
h_t_forward_list.append(h_t)
return h_t_forward_list # a list,length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
# backward gru for first level: word level
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def gru_backward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return: backward hidden state:a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
embedded_words_squeeze.reverse() # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=int(tf.get_shape(embedded_words_squeeze[0])[0]) #h_t = tf.ones([self.batch_size*self.num_sentences, self.hidden_size])
h_t = tf.ones((self.batch_size * self.num_sentences, self.hidden_size))
h_t_backward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze):
h_t = self.gru_single_step_word_level(Xt, h_t)
h_t_backward_list.append(h_t)
h_t_backward_list.reverse() #ADD 2017.06.14
return h_t_backward_list
# forward gru for second level: sentence level
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def gru_backward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
sentence_representation_squeeze.reverse()
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) # scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2))
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
h_t_forward_list.reverse() #ADD 2017.06.14
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def gru_forward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return:forward hidden state: a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=embedded_words_squeeze[0].get_shape().dims[0]
h_t = tf.ones((self.batch_size * self.num_sentences,
self.hidden_size)) #TODO self.hidden_size h_t =int(tf.get_shape(embedded_words_squeeze[0])[0]) # tf.ones([self.batch_size*self.num_sentences, self.hidden_size]) # [batch_size*num_sentences,embed_size]
h_t_forward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze): # Xt: [batch_size*num_sentences,embed_size]
h_t = self.gru_single_step_word_level(Xt,h_t) # [batch_size*num_sentences,embed_size]<------Xt:[batch_size*num_sentences,embed_size];h_t:[batch_size*num_sentences,embed_size]
h_t_forward_list.append(h_t)
return h_t_forward_list # a list,length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
# backward gru for first level: word level
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def gru_backward_word_level(self, embedded_words):
"""
:param embedded_words:[batch_size*num_sentences,sentence_length,embed_size]
:return: backward hidden state:a list.length is sentence_length, each element is [batch_size*num_sentences,hidden_size]
"""
# split embedded_words
embedded_words_splitted = tf.split(embedded_words, self.sequence_length,
axis=1) # it is a list,length is sentence_length, each element is [batch_size*num_sentences,1,embed_size]
embedded_words_squeeze = [tf.squeeze(x, axis=1) for x in
embedded_words_splitted] # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
embedded_words_squeeze.reverse() # it is a list,length is sentence_length, each element is [batch_size*num_sentences,embed_size]
# demension_1=int(tf.get_shape(embedded_words_squeeze[0])[0]) #h_t = tf.ones([self.batch_size*self.num_sentences, self.hidden_size])
h_t = tf.ones((self.batch_size * self.num_sentences, self.hidden_size))
h_t_backward_list = []
for time_step, Xt in enumerate(embedded_words_squeeze):
h_t = self.gru_single_step_word_level(Xt, h_t)
h_t_backward_list.append(h_t)
h_t_backward_list.reverse() #ADD 2017.06.14
return h_t_backward_list
# forward gru for second level: sentence level
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def gru_forward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) #scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2)) # TODO
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,
h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
# backward gru for second level: sentence level
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def gru_backward_sentence_level(self, sentence_representation):
"""
:param sentence_representation: [batch_size,num_sentences,hidden_size*2]
:return:forward hidden state: a list,length is num_sentences, each element is [batch_size,hidden_size]
"""
# split embedded_words
sentence_representation_splitted = tf.split(sentence_representation, self.num_sentences,
axis=1) # it is a list.length is num_sentences,each element is [batch_size,1,hidden_size*2]
sentence_representation_squeeze = [tf.squeeze(x, axis=1) for x in
sentence_representation_splitted] # it is a list.length is num_sentences,each element is [batch_size, hidden_size*2]
sentence_representation_squeeze.reverse()
# demension_1 = int(tf.get_shape(sentence_representation_squeeze[0])[0]) # scalar: batch_size
h_t = tf.ones((self.batch_size, self.hidden_size * 2))
h_t_forward_list = []
for time_step, Xt in enumerate(sentence_representation_squeeze): # Xt:[batch_size, hidden_size*2]
h_t = self.gru_single_step_sentence_level(Xt,h_t) # h_t:[batch_size,hidden_size]<---------Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
h_t_forward_list.append(h_t)
h_t_forward_list.reverse() #ADD 2017.06.14
return h_t_forward_list # a list,length is num_sentences, each element is [batch_size,hidden_size]
def SoftArgmin(outputLeft, outputRight, D=192):
left_result_D = outputLeft
right_result_D = outputRight
left_result_D_squeeze = tf.squeeze(left_result_D, axis=[0, 4])
right_result_D_squeeze = tf.squeeze(right_result_D, axis=[0, 4]) # 192 256 512
left_result_softmax = tf.nn.softmax(left_result_D_squeeze, dim=0)
right_result_softmax = tf.nn.softmax(right_result_D_squeeze, dim=0) # 192 256 512
d_grid = tf.cast(tf.range(D), tf.float32)
d_grid = tf.reshape(d_grid, (-1, 1, 1))
d_grid = tf.tile(d_grid, [1, 256, 512])
left_softargmin = tf.reduce_sum(tf.multiply(left_result_softmax, d_grid), axis=0, keep_dims=True)
right_softargmin = tf.reduce_sum(tf.multiply(right_result_softmax, d_grid), axis=0, keep_dims=True)
return left_softargmin, right_softargmin
def prepare_label(self, input_batch, new_size):
"""Resize masks and perform one-hot encoding.
Args:
input_batch: input tensor of shape [batch_size H W 1].
new_size: a tensor with new height and width.
Returns:
Outputs a tensor of shape [batch_size h w 21]
with last dimension comprised of 0's and 1's only.
"""
with tf.name_scope('label_encode'):
input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # As labels are integer numbers, need to use NN interp.
input_batch = tf.squeeze(input_batch, axis=[3]) # Reducing the channel dimension.
input_batch = tf.one_hot(input_batch, depth=21)
return input_batch
def _build(self):
assert(self.d is not None)
assert(self.lr is not None)
assert(self.l2_penalty is not None)
assert(self.loss_function is not None)
# Get input placeholders and sentence features
self._create_placeholders()
sentence_feats, save_kwargs = self._embed_sentences()
# Define linear model
s1, s2 = self.seed, (self.seed + 1 if self.seed is not None else None)
w = tf.Variable(tf.random_normal((self.d, 1), stddev=SD, seed=s1))
b = tf.Variable(tf.random_normal((1, 1), stddev=SD, seed=s2))
h = tf.squeeze(tf.matmul(sentence_feats, w) + b)
# Define training procedure
self.loss = self._get_loss(h, self.y)
self.loss += self.l2_penalty * tf.nn.l2_loss(w)
self.prediction = tf.sigmoid(h)
self.train_fn = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
self.save_dict = save_kwargs.update({'w': w, 'b': b})
def call(self,inputs):
"""
inputs in as array which contains the support set the embeddings, the target embedding as the second last value in the array, and true class of target embedding as the last value in the array
"""
similarities = []
targetembedding = inputs[-2]
numsupportset = len(inputs)-2
for ii in range(numsupportset):
supportembedding = inputs[ii]
dd = tf.negative(tf.sqrt(tf.reduce_sum(tf.square(supportembedding-targetembedding),1,keep_dims=True)))
similarities.append(dd)
similarities = tf.concat(axis=1,values=similarities)
softmax_similarities = tf.nn.softmax(similarities)
preds = tf.squeeze(tf.matmul(tf.expand_dims(softmax_similarities,1),inputs[-1]))
preds.set_shape((inputs[0].shape[0],self.nway))
return preds
def _read_input(self, filename_queue):
class DataRecord(object):
pass
reader = tf.WholeFileReader()
key, value = reader.read(filename_queue)
record = DataRecord()
decoded_image = tf.image.decode_jpeg(value,
channels=3) # Assumption:Color images are read and are to be generated
# decoded_image_4d = tf.expand_dims(decoded_image, 0)
# resized_image = tf.image.resize_bilinear(decoded_image_4d, [self.target_image_size, self.target_image_size])
# record.input_image = tf.squeeze(resized_image, squeeze_dims=[0])
cropped_image = tf.cast(
tf.image.crop_to_bounding_box(decoded_image, 55, 35, self.crop_image_size, self.crop_image_size),
tf.float32)
decoded_image_4d = tf.expand_dims(cropped_image, 0)
resized_image = tf.image.resize_bilinear(decoded_image_4d, [self.resized_image_size, self.resized_image_size])
record.input_image = tf.squeeze(resized_image, squeeze_dims=[0])
return record
def run_bottleneck_on_image(sess, image_data, image_data_tensor,
bottleneck_tensor):
"""Runs inference on an image to extract the 'bottleneck' summary layer.
Args:
sess: Current active TensorFlow Session.
image_data: String of raw JPEG data.
image_data_tensor: Input data layer in the graph.
bottleneck_tensor: Layer before the final softmax.
Returns:
Numpy array of bottleneck values.
"""
bottleneck_values = sess.run(
bottleneck_tensor,
{image_data_tensor: image_data})
bottleneck_values = np.squeeze(bottleneck_values)
return bottleneck_values
def DCGRU(self, mem, kernel_width, prefix):
"""Convolutional diagonal GRU."""
def conv_lin(input, suffix, bias_start):
return self.conv_linear(input, kernel_width, self.num_units, self.num_units, bias_start,prefix + "/" + suffix)
# perform shift
mem_shifted = tf.squeeze(tf.nn.depthwise_conv2d(tf.expand_dims(mem,1), self.shift_filter,[1,1,1,1],'SAME'),[1])
# calculate the new value
reset = self.hard_sigmoid(conv_lin(mem, "r", 0.5))
candidate = self.hard_tanh(conv_lin(reset * mem, "c", 0.0))
gate = self.hard_sigmoid(conv_lin(mem, "g", 0.7))
candidate =self.dropout(candidate)
candidate = gate*mem_shifted + (1 - gate)*candidate
return candidate
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def reinforce_baseline(decoder_states, reward):
"""
Center the reward by computing a baseline reward over decoder states.
:param decoder_states: internal states of the decoder, tensor of shape (batch_size, time_steps, state_size)
:param reward: reward for each time step, tensor of shape (batch_size, time_steps)
:return: reward - computed baseline, tensor of shape (batch_size, time_steps)
"""
# batch_size = tf.shape(decoder_states)[0]
# time_steps = tf.shape(decoder_states)[1]
# state_size = decoder_states.get_shape()[2]
# states = tf.reshape(decoder_states, shape=tf.stack([batch_size * time_steps, state_size]))
baseline = dense(tf.stop_gradient(decoder_states), units=1, activation=None, name='reward_baseline',
kernel_initializer=tf.constant_initializer(0.01))
baseline = tf.squeeze(baseline, axis=2)
# baseline = tf.reshape(baseline, shape=tf.stack([batch_size, time_steps]))
return reward - baseline
def data_augmentation(img, gt_bboxes, gt_cats, seg, config):
params = config['train_augmentation']
img = apply_with_random_selector(
img,
lambda x, ordering: photometric_distortions(x, ordering, params),
num_cases=4)
if seg is not None:
img = tf.concat([img, tf.cast(seg, tf.float32)], axis=-1)
img, gt_bboxes, gt_cats = scale_distortions(img, gt_bboxes, gt_cats,
params)
img, gt_bboxes = mirror_distortions(img, gt_bboxes, params)
# XXX reference implementation also randomizes interpolation method
img_size = config['image_size']
img_out = tf.image.resize_images(img[..., :3], [img_size, img_size])
gt_bboxes, gt_cats = filter_small_gt(gt_bboxes, gt_cats, 2/config['image_size'])
if seg is not None:
seg_shape = config['fm_sizes'][0]
seg = tf.expand_dims(tf.expand_dims(img[..., 3], 0), -1)
seg = tf.squeeze(tf.image.resize_nearest_neighbor(seg, [seg_shape, seg_shape]))
seg = tf.cast(tf.round(seg), tf.int64)
return img_out, gt_bboxes, gt_cats, seg
def save_np_image(image, output_file, save_format='jpeg'):
"""Saves an image to disk.
Args:
image: 3-D numpy array of shape [image_size, image_size, 3] and dtype
float32, with values in [0, 1].
output_file: str, output file.
save_format: format for saving image (eg. jpeg).
"""
image = np.uint8(image * 255.0)
buf = io.BytesIO()
scipy.misc.imsave(buf, np.squeeze(image, 0), format=save_format)
buf.seek(0)
f = tf.gfile.GFile(output_file, 'w')
f.write(buf.getvalue())
f.close()
def run_bottleneck_on_image(sess, image_data, image_data_tensor,
bottleneck_tensor):
"""Runs inference on an image to extract the 'bottleneck' summary layer.
Args:
sess: Current active TensorFlow Session.
image_data: String of raw JPEG data.
image_data_tensor: Input data layer in the graph.
bottleneck_tensor: Layer before the final softmax.
Returns:
Numpy array of bottleneck values.
"""
bottleneck_values = sess.run(
bottleneck_tensor,
{image_data_tensor: image_data})
bottleneck_values = np.squeeze(bottleneck_values)
return bottleneck_values
def _sample(self, n_samples):
mean, cov_tril = self.mean, self.cov_tril
if not self.is_reparameterized:
mean = tf.stop_gradient(mean)
cov_tril = tf.stop_gradient(cov_tril)
def tile(t):
new_shape = tf.concat([[n_samples], tf.ones_like(tf.shape(t))], 0)
return tf.tile(tf.expand_dims(t, 0), new_shape)
batch_mean = tile(mean)
batch_cov = tile(cov_tril)
# n_dim -> n_dim x 1 for matmul
batch_mean = tf.expand_dims(batch_mean, -1)
noise = tf.random_normal(tf.shape(batch_mean), dtype=self.dtype)
samples = tf.matmul(batch_cov, noise) + batch_mean
samples = tf.squeeze(samples, -1)
# Update static shape
static_n_samples = n_samples if isinstance(n_samples, int) else None
samples.set_shape(tf.TensorShape([static_n_samples])
.concatenate(self.get_batch_shape())
.concatenate(self.get_value_shape()))
return samples
def _log_prob(self, given):
mean, cov_tril = (self.path_param(self.mean),
self.path_param(self.cov_tril))
log_det = 2 * tf.reduce_sum(
tf.log(tf.matrix_diag_part(cov_tril)), axis=-1)
N = tf.cast(self._n_dim, self.dtype)
logZ = - N / 2 * tf.log(2 * tf.constant(np.pi, dtype=self.dtype)) - \
log_det / 2
# logZ.shape == batch_shape
if self._check_numerics:
logZ = tf.check_numerics(logZ, "log[det(Cov)]")
# (given-mean)' Sigma^{-1} (given-mean) =
# (g-m)' L^{-T} L^{-1} (g-m) = |x|^2, where Lx = g-m =: y.
y = tf.expand_dims(given - mean, -1)
L, _ = maybe_explicit_broadcast(
cov_tril, y, 'MultivariateNormalCholesky.cov_tril',
'expand_dims(given, -1)')
x = tf.matrix_triangular_solve(L, y, lower=True)
x = tf.squeeze(x, -1)
stoc_dist = -0.5 * tf.reduce_sum(tf.square(x), axis=-1)
return logZ + stoc_dist