def yolo_loss(labels, predictions, mask):
masked_labels = tf.boolean_mask(labels, mask)
masked_predictions = tf.boolean_mask(predictions, mask)
# ious = tensor_iou(masked_predictions[..., 1:5], masked_labels[..., 1:5])
# ious = tf.expand_dims(ious, axis=-1)
xy_loss = tf.reduce_sum((masked_labels[..., :2] - masked_predictions[..., 1:3]) ** 2)
wh_loss = tf.reduce_sum((tf.sqrt(masked_predictions[..., 3:5]) - tf.sqrt(masked_labels[..., 2:4])) ** 2)
# conf_loss = tf.reduce_sum((masked_predictions[..., 0] - ious) ** 2)
conf_loss = tf.reduce_sum((1 - masked_predictions[..., 0]) ** 2)
no_obj_loss = tf.reduce_sum((tf.boolean_mask(predictions, ~mask)[..., 0] ** 2))
class_loss = tf.reduce_sum((masked_predictions[..., 5:] - masked_labels[..., 4:]) ** 2)
loss = 5 * (xy_loss + wh_loss) + conf_loss + no_obj_loss + class_loss
return loss
python类expand_dims()的实例源码
def random_access_problem(which=1):
import raputil as ru
if which == 1:
opts = ru.Problem.scenario1()
else:
opts = ru.Problem.scenario2()
p = ru.Problem(**opts)
x1 = p.genX(1)
y1 = p.fwd(x1)
A = p.S
M,N = A.shape
nbatches = int(math.ceil(1000 /x1.shape[1]))
prob = NumpyGenerator(p=p,nbatches=nbatches,A=A,opts=opts,iid=(which==1))
if which==2:
prob.maskX_ = tf.expand_dims( tf.constant( (np.arange(N) % (N//2) < opts['Nu']).astype(np.float32) ) , 1)
_,prob.noise_var = p.add_noise(y1)
unused = p.genYX(nbatches) # for legacy reasons -- want to compare against a previous run
(prob.yval, prob.xval) = p.genYX(nbatches)
(prob.yinit, prob.xinit) = p.genYX(nbatches)
import multiprocessing as mp
prob.nsubprocs = mp.cpu_count()
return prob
def pwlin_grid(r_,rvar_,theta_,dtheta = .75):
"""piecewise linear with noise-adaptive grid spacing.
returns xhat,dxdr
where
q = r/dtheta/sqrt(rvar)
xhat = r * interp(q,theta)
all but the last dimensions of theta must broadcast to r_
e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7)
"""
ntheta = int(theta_.get_shape()[-1])
scale_ = dtheta / tf.sqrt(rvar_)
ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 )
centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 )
outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center)
gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable)
xhat_ = gain_ * r_
dxdr_ = tf.gradients(xhat_,r_)[0]
return (xhat_,dxdr_)
def interp1d_(xin_,xp,yp_):
"""
Interpolate a uniformly sampled piecewise linear function. Mapping elements
from xin_ to the result. Input values will be clipped to range of xp.
xin_ : input tensor (real)
xp : x grid (constant -- must be a 1d numpy array, uniformly spaced)
yp_ : tensor of the result values at the gridpoints xp
"""
import tensorflow as tf
x_ = tf.clip_by_value(xin_,xp.min(),xp.max())
dx = xp[1]-xp[0]
assert len(xp.shape)==1,'only 1d interpolation'
assert xp.shape[0]==int(yp_.get_shape()[0])
assert abs(np.diff(xp)/dx - 1.0).max() < 1e-6,'must be uniformly sampled'
newshape = [ ]
x1_ = tf.expand_dims(x_,-1)
dt = yp_.dtype
wt_ = tf.maximum(tf.constant(0.,dtype=dt), 1-abs(x1_ - tf.constant(xp,dtype=dt))/dx )
y_ = tf.reduce_sum(wt_ * yp_,axis=-1)
return y_
def calculate_cosine_similarity_matrix(v1, v2):
"""
Calculate the cosine similarity matrix between two
sentences.
Parameters
----------
v1: Tensor
Tensor of shape (batch_size, num_sentence_words,
context_rnn_hidden_size), representing the output of running
a sentence through a BiLSTM.
v2: Tensor
Tensor of shape (batch_size, num_sentence_words,
context_rnn_hidden_size), representing the output of running
another sentences through a BiLSTM.
"""
# Shape: (batch_size, 1, num_sentence_words, rnn_hidden_size)
expanded_v1 = tf.expand_dims(v1, 1)
# Shape: (batch_size, num_sentence_words, 1, rnn_hidden_size)
expanded_v2 = tf.expand_dims(v2, 2)
# Shape: (batch_size, num_sentence_words, num_sentence_words)
cosine_relevancy_matrix = cosine_distance(expanded_v1,
expanded_v2)
return cosine_relevancy_matrix
def mask_similarity_matrix(similarity_matrix, mask_a, mask_b):
"""
Given the mask of the two sentences, apply the mask to the similarity
matrix.
Parameters
----------
similarity_matrix: Tensor
Tensor of shape (batch_size, num_sentence_words, num_sentence_words).
mask_a: Tensor
Tensor of shape (batch_size, num_sentence_words). This mask should
correspond to the first vector (v1) used to calculate the similarity
matrix.
mask_b: Tensor
Tensor of shape (batch_size, num_sentence_words). This mask should
correspond to the second vector (v2) used to calculate the similarity
matrix.
"""
similarity_matrix = tf.multiply(similarity_matrix,
tf.expand_dims(tf.cast(mask_a, "float"), 1))
similarity_matrix = tf.multiply(similarity_matrix,
tf.expand_dims(tf.cast(mask_b, "float"), 2))
return similarity_matrix
def multi_perspective_expand_for_1D(in_tensor, weights):
"""
Given a 1D input tensor and weights of the appropriate shape,
weight the input tensor by the weights by multiplying them
together.
Parameters
----------
in_tensor:
Tensor of shape (x,) to be weighted.
weights:
Tensor of shape (y, x) to multiply the input tensor by. In this
case, y is the number of perspectives.
Returns
-------
weighted_input:
Tensor of shape (y, x), representing the weighted input
across multiple perspectives.
"""
# Shape: (1, rnn_hidden_dim)
in_tensor_expanded = tf.expand_dims(in_tensor, axis=0)
# Shape: (multiperspective_dims, rnn_hidden_dim)
return tf.multiply(in_tensor_expanded, weights)
def dueling_model(img_in, num_actions, scope, reuse=False):
"""As described in https://arxiv.org/abs/1511.06581"""
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("state_value"):
state_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
with tf.variable_scope("action_value"):
actions_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores
def dueling_model(img_in, num_actions, scope, reuse=False):
"""As described in https://arxiv.org/abs/1511.06581"""
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("state_value"):
state_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
with tf.variable_scope("action_value"):
actions_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores
def preprocess_for_eval(image, height, width,
central_fraction=0.875, scope=None):
"""Prepare one image for evaluation.
If height and width are specified it would output an image with that size by
applying resize_bilinear.
If central_fraction is specified it would cropt the central fraction of the
input image.
Args:
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
[0, 1], otherwise it would converted to tf.float32 assuming that the range
is [0, MAX], where MAX is largest positive representable number for
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
height: integer
width: integer
central_fraction: Optional Float, fraction of the image to crop.
scope: Optional scope for name_scope.
Returns:
3-D float Tensor of prepared image.
"""
with tf.name_scope(scope, 'eval_image', [image, height, width]):
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
if central_fraction:
image = tf.image.central_crop(image, central_fraction=central_fraction)
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width],
align_corners=False)
image = tf.squeeze(image, [0])
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
return image
def load_batch(dataset, batch_size, height=image_size, width=image_size, is_training=True):
'''
Loads a batch for training.
INPUTS:
- dataset(Dataset): a Dataset class object that is created from the get_split function
- batch_size(int): determines how big of a batch to train
- height(int): the height of the image to resize to during preprocessing
- width(int): the width of the image to resize to during preprocessing
- is_training(bool): to determine whether to perform a training or evaluation preprocessing
OUTPUTS:
- images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
- labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).
'''
#First create the data_provider object
data_provider = slim.dataset_data_provider.DatasetDataProvider(
dataset,
common_queue_capacity = 24 + 3 * batch_size,
common_queue_min = 24)
#Obtain the raw image using the get method
raw_image, label = data_provider.get(['image', 'label'])
#Perform the correct preprocessing for this image depending if it is training or evaluating
image = inception_preprocessing.preprocess_image(raw_image, height, width, is_training)
#As for the raw images, we just do a simple reshape to batch it up
raw_image = tf.expand_dims(raw_image, 0)
raw_image = tf.image.resize_nearest_neighbor(raw_image, [height, width])
raw_image = tf.squeeze(raw_image)
#Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
images, raw_images, labels = tf.train.batch(
[image, raw_image, label],
batch_size = batch_size,
num_threads = 4,
capacity = 4 * batch_size,
allow_smaller_final_batch = True)
return images, raw_images, labels
def generate_on_topic(self, sess, topic_id, start_word_id, temperature=1.0, max_length=30, stop_word_id=None):
if topic_id != -1:
topic_emb = sess.run(tf.expand_dims(tf.nn.embedding_lookup(self.topic_output_embedding, topic_id), 0))
else:
topic_emb = None
return self.generate(sess, topic_emb, start_word_id, temperature, max_length, stop_word_id)
#generate a sequence of words, given a document
threepart_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def finalize_predictions(self, preds):
# add a dimension of 1 between the batch size and the sequence length to emulate a beam width of 1
return tf.expand_dims(preds.sequence, axis=1)
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def _tile_batch(self, t):
if t.shape.ndims is None or t.shape.ndims < 1:
raise ValueError("t must have statically known rank")
tiling = [1] * (t.shape.ndims + 1)
tiling[1] = self._beam_width
tiled = tf.tile(tf.expand_dims(t, 1), tiling)
return tiled
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def _make_beam_mask(self, num_available_beams):
mask = tf.sequence_mask(num_available_beams, self._beam_width)
return tf.tile(tf.expand_dims(mask, axis=2), multiples=[1, 1, self._output_size])
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def _tensor_gather_helper(gather_indices, gather_from, batch_size,
range_size, gather_shape):
"""Helper for gathering the right indices from the tensor.
This works by reshaping gather_from to gather_shape (e.g. [-1]) and then
gathering from that according to the gather_indices, which are offset by
the right amounts in order to preserve the batch order.
Args:
gather_indices: The tensor indices that we use to gather.
gather_from: The tensor that we are gathering from.
batch_size: The input batch size.
range_size: The number of values in each range. Likely equal to beam_width.
gather_shape: What we should reshape gather_from to in order to preserve the
correct values. An example is when gather_from is the attention from an
AttentionWrapperState with shape [batch_size, beam_width, attention_size].
There, we want to preserve the attention_size elements, so gather_shape is
[batch_size * beam_width, -1]. Then, upon reshape, we still have the
attention_size as desired.
Returns:
output: Gathered tensor of shape tf.shape(gather_from)[:1+len(gather_shape)]
"""
range_ = tf.expand_dims(tf.range(batch_size) * range_size, 1)
gather_indices = tf.reshape(gather_indices + range_, [-1])
output = tf.gather(tf.reshape(gather_from, gather_shape), gather_indices)
final_shape = tf.shape(gather_from)[:1 + len(gather_shape)]
final_static_shape = (tf.TensorShape([None]).concatenate(gather_from.shape[1:1 + len(gather_shape)]))
output = tf.reshape(output, final_shape)
output.set_shape(final_static_shape)
return output
def decov_loss(xs):
"""Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
'Reducing Overfitting In Deep Networks by Decorrelating Representation'
"""
x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
m = tf.reduce_mean(x, 0, True)
z = tf.expand_dims(x-m, 2)
corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
corr_frob_sqr = tf.reduce_sum(tf.square(corr))
corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
return loss
def average_gradients(tower_gradients):
r'''
A routine for computing each variable's average of the gradients obtained from the GPUs.
Note also that this code acts as a syncronization point as it requires all
GPUs to be finished with their mini-batch before it can run to completion.
'''
# List of average gradients to return to the caller
average_grads = []
# Loop over gradient/variable pairs from all towers
for grad_and_vars in zip(*tower_gradients):
# Introduce grads to store the gradients for the current variable
grads = []
# Loop over the gradients for the current variable
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
# Create a gradient/variable tuple for the current variable with its average gradient
grad_and_var = (grad, grad_and_vars[0][1])
# Add the current tuple to average_grads
average_grads.append(grad_and_var)
# Return result to caller
return average_grads
# Logging
# =======
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
# The second dimension of labels must be equal to the longest label length in the batch
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
with tf.control_dependencies([correct_shape_assert]):
labels = tf.identity(labels)
label_shape = tf.shape(labels)
num_batches_tns = tf.stack([label_shape[0]])
max_num_labels_tns = tf.stack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
init = tf.expand_dims(init, 0)
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
dense_mask = dense_mask[:, 0, :]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
shape = [batch_size, tf.reduce_max(label_lengths)]
vals_sparse = gather_nd(labels, indices, shape)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
# Validate and normalize transcriptions. Returns a cleaned version of the label
# or None if it's invalid.
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
# The second dimension of labels must be equal to the longest label length in the batch
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
with tf.control_dependencies([correct_shape_assert]):
labels = tf.identity(labels)
label_shape = tf.shape(labels)
num_batches_tns = tf.stack([label_shape[0]])
max_num_labels_tns = tf.stack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
init = tf.expand_dims(init, 0)
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
dense_mask = dense_mask[:, 0, :]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
shape = [batch_size, tf.reduce_max(label_lengths)]
vals_sparse = gather_nd(labels, indices, shape)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
# Validate and normalize transcriptions. Returns a cleaned version of the label
# or None if it's invalid.