def mean(x, reduce_instance_dims=True, name=None):
"""Computes the mean of the values of a `Tensor` over the whole dataset.
Args:
x: A `Tensor`.
reduce_instance_dims: By default collapses the batch and instance dimensions
to arrive at a single scalar output. If False, only collapses the batch
dimension and outputs a vector of the same shape as the input.
name: (Optional) A name for this operation.
Returns:
A `Tensor` containing the mean. If `x` is floating point, the mean will
have the same type as `x`. If `x` is integral, the output is cast to float32
for int8 and int16 and float64 for int32 and int64 (similar to the behavior
of tf.truediv).
"""
with tf.name_scope(name, 'mean'):
# Note: Calling `sum` defined in this module, not the builtin.
return tf.divide(
sum(x, reduce_instance_dims), size(x, reduce_instance_dims))
python类truediv()的实例源码
def safe_div(numerator, denominator, name='safe_div'):
"""Divides two values, returning 0 if the denominator is <= 0.
Args:
numerator: A real `Tensor`.
denominator: A real `Tensor`, with dtype matching `numerator`.
name: Name for the returned op.
Returns:
0 if `denominator` <= 0, else `numerator` / `denominator`
"""
return tf.where(
tf.greater(denominator, 0),
tf.truediv(numerator, denominator),
0,
name=name)
def scale_bboxes(bbox, img_shape):
"""Scale bboxes to [0, 1). bbox format [ymin, xmin, ymax, xmax]
Args:
bbox: 2-D with shape '[num_bbox, 4]'
img_shape: 1-D with shape '[4]'
Return:
sclaed_bboxes: scaled bboxes
"""
img_h = tf.cast(img_shape[0], dtype=tf.float32)
img_w = tf.cast(img_shape[1], dtype=tf.float32)
shape = bbox.get_shape().as_list()
_axis = 1 if len(shape) > 1 else 0
[y_min, x_min, y_max, x_max] = tf.unstack(bbox, axis=_axis)
y_1 = tf.truediv(y_min, img_h)
x_1 = tf.truediv(x_min, img_w)
y_2 = tf.truediv(y_max, img_h)
x_2 = tf.truediv(x_max, img_w)
return tf.stack([y_1, x_1, y_2, x_2], axis=_axis)
def scale_bboxes(bbox, img_shape):
"""Scale bboxes to [0, 1). bbox format [ymin, xmin, ymax, xmax]
Args:
bbox: 2-D with shape '[num_bbox, 4]'
img_shape: 1-D with shape '[4]'
Return:
sclaed_bboxes: scaled bboxes
"""
img_h = tf.cast(img_shape[0], dtype=tf.float32)
img_w = tf.cast(img_shape[1], dtype=tf.float32)
shape = bbox.get_shape().as_list()
_axis = 1 if len(shape) > 1 else 0
[y_min, x_min, y_max, x_max] = tf.unstack(bbox, axis=_axis)
y_1 = tf.truediv(y_min, img_h)
x_1 = tf.truediv(x_min, img_w)
y_2 = tf.truediv(y_max, img_h)
x_2 = tf.truediv(x_max, img_w)
return tf.stack([y_1, x_1, y_2, x_2], axis=_axis)
def make_test_node(self, hypers_name):
outputs = self.tf_nodes[hypers_name]["outputs"]
deltas = []
for var_name, output_node in outputs.iteritems():
data_node = self.tf_nodes[hypers_name]["placeholders"][var_name]
output_rank = output_node.get_shape().ndims
if output_rank == 1:
output_node = tf.tile(tf.expand_dims(output_node, 0), [tf.shape(data_node)[0], 1])
deltas.append(
tf.to_int32(tf.argmax(output_node, dimension=1)) - data_node)
zero_if_correct = tf.reduce_sum(tf.pack(deltas), reduction_indices=0)
zero_elements = tf.equal(zero_if_correct, tf.zeros_like(zero_if_correct))
n_correct = tf.reduce_sum(tf.to_int32(zero_elements))
n_total = tf.shape(zero_if_correct)[0]
accuracy = tf.truediv(n_correct, n_total)
self.summary_nodes["test"] = tf.scalar_summary('test_accuracy', accuracy)
self.tf_nodes[hypers_name]["accuracy"] = accuracy
def _prepare_image(self, image):
"""Resize the image to a maximum height of `self.height` and maximum
width of `self.width` while maintaining the aspect ratio. Pad the
resized image to a fixed size of ``[self.height, self.width]``."""
img = tf.image.decode_png(image, channels=self.channels)
dims = tf.shape(img)
self.width = self.max_width
max_width = tf.to_int32(tf.ceil(tf.truediv(dims[1], dims[0]) * self.height_float))
max_height = tf.to_int32(tf.ceil(tf.truediv(self.width, max_width) * self.height_float))
resized = tf.cond(
tf.greater_equal(self.width, max_width),
lambda: tf.cond(
tf.less_equal(dims[0], self.height),
lambda: tf.to_float(img),
lambda: tf.image.resize_images(img, [self.height, max_width],
method=tf.image.ResizeMethod.BICUBIC),
),
lambda: tf.image.resize_images(img, [max_height, self.width],
method=tf.image.ResizeMethod.BICUBIC)
)
padded = tf.image.pad_to_bounding_box(resized, 0, 0, self.height, self.width)
return padded
def log_likelihood(mu, var, x, muq, varq, a, mask_flat, config):
if config.out_distr == 'bernoulli':
log_lik = log_bernoulli(x, mu, eps=1e-6) # (bs*L, d1*d2)
elif config.out_distr == 'gaussian':
log_lik = log_gaussian(x, mu, var)
log_lik = tf.reduce_sum(log_lik, 1) # (bs*L, )
log_lik = tf.multiply(mask_flat, log_lik)
# TODO: dropout scales the output as input/keep_prob. Issue?
if config.ll_keep_prob < 1.0:
log_lik = tf.layers.dropout(log_lik, config.ll_keep_prob)
# We compute the log-likelihood *per frame*
num_el = tf.reduce_sum(mask_flat)
log_px_given_a = tf.truediv(tf.reduce_sum(log_lik), num_el) # ()
if config.use_vae:
log_qa_given_x = tf.reduce_sum(log_gaussian(a, muq, varq), 1) # (bs*L, )
log_qa_given_x = tf.multiply(mask_flat, log_qa_given_x)
log_qa_given_x = tf.truediv(tf.reduce_sum(log_qa_given_x), num_el) # ()
else:
log_qa_given_x = tf.constant(0.0, dtype=tf.float32, shape=())
LL = log_px_given_a - log_qa_given_x
return LL, log_px_given_a, log_qa_given_x
def iou(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'IOU'):
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matched_iou(boxlist1, boxlist2, scope=None):
"""Compute intersection-over-union between corresponding boxes in boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'MatchedIOU'):
intersections = matched_intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = areas1 + areas2 - intersections
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def ioa(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-area between box collections.
intersection-over-area (IOA) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, ioa(box1, box2) != ioa(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise ioa scores.
"""
with tf.name_scope(scope, 'IOA'):
intersections = intersection(boxlist1, boxlist2)
areas = tf.expand_dims(area(boxlist2), 0)
return tf.truediv(intersections, areas)
utils_combine.py 文件源码
项目:adversarial-deep-structural-networks
作者: wentaozhu
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def dice_tf(label, pred):
TP = tf.reduce_sum(tf.mul(pred, label))
FP = tf.reduce_sum(tf.mul(pred, 1-label))
FN = tf.reduce_sum(tf.mul(1-pred, label))
return tf.truediv(2*TP, FP+FN+2*TP)
def dice_tf(label, pred):
TP = tf.reduce_sum(tf.mul(pred, label))
FP = tf.reduce_sum(tf.mul(pred, 1-label))
FN = tf.reduce_sum(tf.mul(1-pred, label))
return tf.truediv(2*TP, FP+FN+2*TP)
def _get_testing(rnn_logits,sequence_length,label,label_length):
"""Create ops for testing (all scalars):
loss: CTC loss function value,
label_error: Batch-normalized edit distance on beam search max
sequence_error: Batch-normalized sequence error rate
"""
with tf.name_scope("train"):
loss = model.ctc_loss_layer(rnn_logits,label,sequence_length)
with tf.name_scope("test"):
predictions,_ = tf.nn.ctc_beam_search_decoder(rnn_logits,
sequence_length,
beam_width=128,
top_paths=1,
merge_repeated=True)
hypothesis = tf.cast(predictions[0], tf.int32) # for edit_distance
label_errors = tf.edit_distance(hypothesis, label, normalize=False)
sequence_errors = tf.count_nonzero(label_errors,axis=0)
total_label_error = tf.reduce_sum( label_errors )
total_labels = tf.reduce_sum( label_length )
label_error = tf.truediv( total_label_error,
tf.cast(total_labels, tf.float32 ),
name='label_error')
sequence_error = tf.truediv( tf.cast( sequence_errors, tf.int32 ),
tf.shape(label_length)[0],
name='sequence_error')
tf.summary.scalar( 'loss', loss )
tf.summary.scalar( 'label_error', label_error )
tf.summary.scalar( 'sequence_error', sequence_error )
return loss, label_error, sequence_error
def __truediv__(self, other):
return tf.truediv(self, other)
def __rtruediv__(self, other):
return tf.truediv(other, self)
def var(x, reduce_instance_dims=True, name=None):
"""Computes the variance of the values of a `Tensor` over the whole dataset.
Uses the biased variance (0 delta degrees of freedom), as given by
(x - mean(x))**2 / length(x).
Args:
x: A `Tensor`.
reduce_instance_dims: By default collapses the batch and instance dimensions
to arrive at a single scalar output. If False, only collapses the batch
dimension and outputs a vector of the same shape as the input.
name: (Optional) A name for this operation.
Returns:
A `Tensor` containing the variance. If `x` is floating point, the variance
will have the same type as `x`. If `x` is integral, the output is cast to
float32 for int8 and int16 and float64 for int32 and int64 (similar to the
behavior of tf.truediv).
"""
with tf.name_scope(name, 'var'):
# Note: Calling `mean`, `sum`, and `size` as defined in this module, not the
# builtins.
x_mean = mean(x, reduce_instance_dims)
# x_mean will be float32 or float64, depending on type of x.
squared_deviations = tf.square(tf.cast(x, x_mean.dtype) - x_mean)
return mean(squared_deviations, reduce_instance_dims)
def scale_to_z_score(x, name=None):
"""Returns a standardized column with mean 0 and variance 1.
Scaling to z-score subtracts out the mean and divides by standard deviation.
Note that the standard deviation computed here is based on the biased variance
(0 delta degrees of freedom), as computed by analyzers.var.
Args:
x: A numeric `Tensor`.
name: (Optional) A name for this operation.
Returns:
A `Tensor` containing the input column scaled to mean 0 and variance 1
(standard deviation 1), given by: (x - mean(x)) / std_dev(x).
If `x` is floating point, the mean will have the same type as `x`. If `x` is
integral, the output is cast to float32 for int8 and int16 and float64 for
int32 and int64 (similar to the behavior of tf.truediv).
Note that TFLearn generally permits only tf.int64 and tf.float32, so casting
this scaler's output may be necessary. In particular, scaling an int64
tensor yields a float64 tensor, which would need a cast to float32 to be
used in TFLearn.
"""
with tf.name_scope(name, 'scale_to_z_score'):
# x_mean will be float32 or float64, depending on type of x.
x_mean = analyzers.mean(x)
return (tf.cast(x, x_mean.dtype) - x_mean) / tf.sqrt(analyzers.var(x))
def __init__(self, config, reuse=False):
self._config = config
self._x = tf.placeholder(
tf.float32, [None, config.height, config.width, config.channels],
name="x")
embedding = self.forward_pass(reuse)
self._feats = tf.truediv(
embedding,
tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keep_dims=True)))
# Number of relevant points for each query
self._num_pos = tf.placeholder(tf.int32, [None], name="num_pos")
self._num_neg = tf.placeholder(tf.int32, [None], name="num_neg")
self._batch_size = tf.shape(self._x)[0]
# The inds belonging to the positive and negative sets for each query
self._pos_inds = tf.placeholder(tf.int32, [None, None], name="pos_inds")
self._neg_inds = tf.placeholder(tf.int32, [None, None], name="neg_inds")
self._n_queries_to_parse = tf.placeholder(
tf.int32, [], name="n_queries_to_parse")
# The solution of loss-augmented inference for each query
self._Y_aug = tf.placeholder(
tf.float32, [None, None, None],
name="Y_aug") # (num queries, num_pos, num_neg)
self._phi_pos, self._phi_neg, self._mAP_score_std, \
self._mAP_score_aug, self._mAP_score_GT, self._skipped_queries = self.perform_inference_mAP()
self._loss = self.compute_loss()
self._train_step = self.get_train_step()
def join_branches(self, feats_A, feats_B):
feats_A = tf.truediv(
feats_A, tf.sqrt(tf.reduce_sum(tf.square(feats_A), 1, keep_dims=True)))
feats_B = tf.truediv(
feats_B, tf.sqrt(tf.reduce_sum(tf.square(feats_B), 1, keep_dims=True)))
if self.config.join_branches == "concat":
pair_feats = tf.concat(1, [feats_A, feats_B])
elif self.config.join_branches == "abs_diff":
pair_feats = tf.abs(feats_A - feats_B)
return pair_feats
def __init__(self, model, mask, prob, coords, offset_xy_min, offset_xy_max, areas):
self.model = model
with tf.name_scope('true'):
self.mask = tf.identity(mask, name='mask')
self.prob = tf.identity(prob, name='prob')
self.coords = tf.identity(coords, name='coords')
self.offset_xy_min = tf.identity(offset_xy_min, name='offset_xy_min')
self.offset_xy_max = tf.identity(offset_xy_max, name='offset_xy_max')
self.areas = tf.identity(areas, name='areas')
with tf.name_scope('iou') as name:
_offset_xy_min = tf.maximum(model.offset_xy_min, self.offset_xy_min, name='_offset_xy_min')
_offset_xy_max = tf.minimum(model.offset_xy_max, self.offset_xy_max, name='_offset_xy_max')
_wh = tf.maximum(_offset_xy_max - _offset_xy_min, 0.0, name='_wh')
_areas = tf.reduce_prod(_wh, -1, name='_areas')
areas = tf.maximum(self.areas + model.areas - _areas, 1e-10, name='areas')
iou = tf.truediv(_areas, areas, name=name)
with tf.name_scope('mask'):
best_box_iou = tf.reduce_max(iou, 2, True, name='best_box_iou')
best_box = tf.to_float(tf.equal(iou, best_box_iou), name='best_box')
mask_best = tf.identity(self.mask * best_box, name='mask_best')
mask_normal = tf.identity(1 - mask_best, name='mask_normal')
with tf.name_scope('dist'):
iou_dist = tf.square(model.iou - mask_best, name='iou_dist')
coords_dist = tf.square(model.coords - self.coords, name='coords_dist')
prob_dist = tf.square(model.prob - self.prob, name='prob_dist')
with tf.name_scope('objectives'):
cnt = np.multiply.reduce(iou_dist.get_shape().as_list())
self['iou_best'] = tf.identity(tf.reduce_sum(mask_best * iou_dist) / cnt, name='iou_best')
self['iou_normal'] = tf.identity(tf.reduce_sum(mask_normal * iou_dist) / cnt, name='iou_normal')
self['coords'] = tf.identity(tf.reduce_sum(tf.expand_dims(mask_best, -1) * coords_dist) / cnt, name='coords')
self['prob'] = tf.identity(tf.reduce_sum(tf.expand_dims(self.mask, -1) * prob_dist) / cnt, name='prob')
def __init__(self, model, mask, prob, coords, offset_xy_min, offset_xy_max, areas):
self.model = model
with tf.name_scope('true'):
self.mask = tf.identity(mask, name='mask')
self.prob = tf.identity(prob, name='prob')
self.coords = tf.identity(coords, name='coords')
self.offset_xy_min = tf.identity(offset_xy_min, name='offset_xy_min')
self.offset_xy_max = tf.identity(offset_xy_max, name='offset_xy_max')
self.areas = tf.identity(areas, name='areas')
with tf.name_scope('iou') as name:
_offset_xy_min = tf.maximum(model.offset_xy_min, self.offset_xy_min, name='_offset_xy_min')
_offset_xy_max = tf.minimum(model.offset_xy_max, self.offset_xy_max, name='_offset_xy_max')
_wh = tf.maximum(_offset_xy_max - _offset_xy_min, 0.0, name='_wh')
_areas = tf.reduce_prod(_wh, -1, name='_areas')
areas = tf.maximum(self.areas + model.areas - _areas, 1e-10, name='areas')
iou = tf.truediv(_areas, areas, name=name)
with tf.name_scope('mask'):
best_box_iou = tf.reduce_max(iou, 2, True, name='best_box_iou')
best_box = tf.to_float(tf.equal(iou, best_box_iou), name='best_box')
mask_best = tf.identity(self.mask * best_box, name='mask_best')
mask_normal = tf.identity(1 - mask_best, name='mask_normal')
with tf.name_scope('dist'):
iou_dist = tf.square(model.iou - mask_best, name='iou_dist')
coords_dist = tf.square(model.coords - self.coords, name='coords_dist')
prob_dist = tf.square(model.prob - self.prob, name='prob_dist')
with tf.name_scope('objectives'):
cnt = np.multiply.reduce(iou_dist.get_shape().as_list())
self['iou_best'] = tf.identity(tf.reduce_sum(mask_best * iou_dist) / cnt, name='iou_best')
self['iou_normal'] = tf.identity(tf.reduce_sum(mask_normal * iou_dist) / cnt, name='iou_normal')
_mask_best = tf.expand_dims(mask_best, -1)
self['coords'] = tf.identity(tf.reduce_sum(_mask_best * coords_dist) / cnt, name='coords')
self['prob'] = tf.identity(tf.reduce_sum(_mask_best * prob_dist) / cnt, name='prob')
def do_not_pretrain(self):
with tf.variable_scope("SDAE_Variable"):
pre_W = tf.get_variable(name=("pre_W"+str(self.itr)), initializer=tf.truncated_normal(shape=[self.n_visible, self.n_hidden],
mean=0, stddev=tf.truediv(1.0,self.lambda_w)), dtype=tf.float32)
pre_b = tf.get_variable(name=("pre_b"+str(self.itr)), initializer=tf.zeros(shape=self.n_hidden), dtype=tf.float32)
'''
pre_W = tf.get_variable(name=("pre_W"+str(self.itr)), shape=[self.n_visible, self.n_hidden], dtype=tf.float32,initializer=tf.random_normal_initializer())
pre_b = tf.get_variable(name=("pre_b"+str(self.itr)), shape=[self.n_hidden], dtype=tf.float32,
initializer=tf.random_normal_initializer())
'''
return pre_W , pre_b
def test_TrueDiv(self):
t = tf.truediv(*self.random((3, 5), (3, 5)))
self.check(t)
def __softmax_crossentropy(self, scores, targets):
scores_exp = tf.exp(scores)
scores_sum = tf.maximum(tf.reduce_sum(scores_exp, axis=3), 1e-10)
scores_sum = tf.expand_dims(scores_sum, axis=-1)
scores_normalized = tf.truediv(scores_exp, scores_sum, name="scores_normalized")
scores_normalized = tf.maximum(scores_normalized, 1e-10)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(targets * tf.log(scores_normalized), reduction_indices=[3]))
return cross_entropy
def _build_loss(self):
self.rewards = tf.placeholder(tf.float32, [None])
self.actions = tf.placeholder(tf.uint8, [None])
self.adv = tf.placeholder(tf.float32, [None], name="adv")
a_one_hot = tf.one_hot(self.actions, self.action_dim)
log_prob = tf.log(self.pf + 1e-6)
log_pi_a_given_s = tf.reduce_sum(log_prob * a_one_hot, 1)
policy_loss = -tf.reduce_sum(log_pi_a_given_s * self.adv)
value_loss = tf.nn.l2_loss(self.vf-self.rewards) # tf.maximum(self.entropy_beta,1)
entropy_beta = linear_decrise_op(self.eb, self.global_step, 'entropy_beta')
xentropy_loss = tf.reduce_sum(self.pf * log_prob)
self.total_loss = policy_loss + 0.5 * value_loss + entropy_beta * xentropy_loss
batch_size = tf.cast(tf.shape(self.rewards)[0], tf.float32)
#self.total_loss = tf.truediv(self.total_loss,batch_size,name='total_loss')
self.for_summary_scalar += [
tf.reduce_mean(self.adv, name='adv'),
tf.reduce_mean(self.vf, name='value_mean'),
tf.reduce_mean(log_pi_a_given_s, name='log_p_mean'),
tf.reduce_mean(self.rewards, name="true_value_mean"),
tf.identity(policy_loss/batch_size, name="policy_loss"),
tf.identity(value_loss/batch_size, name="value_loss"),
tf.identity((entropy_beta * xentropy_loss)/batch_size, name = 'entropy_loss'),
entropy_beta,
# self.lr,
tf.identity(self.total_loss, name = 'total_loss')
]
self.for_summary_hist += [tf.argmax(self.pf, axis=1, name='action_predicted')]
def _add_loss_graph(self):
"""Define the loss operation."""
mc = self.mc
with tf.variable_scope('class_regression') as scope:
# cross-entropy: q * -log(p) + (1-q) * -log(1-p)
# add a small value into log to prevent blowing up
self.class_loss = tf.truediv(
tf.reduce_sum(
(self.labels*(-tf.log(self.pred_class_probs+mc.EPSILON))
+ (1-self.labels)*(-tf.log(1-self.pred_class_probs+mc.EPSILON)))
* self.input_mask * mc.LOSS_COEF_CLASS),
self.num_objects,
name='class_loss'
)
tf.add_to_collection('losses', self.class_loss)
with tf.variable_scope('confidence_score_regression') as scope:
input_mask = tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS])
self.conf_loss = tf.reduce_mean(
tf.reduce_sum(
tf.square((self.ious - self.pred_conf))
* (input_mask*mc.LOSS_COEF_CONF_POS/self.num_objects
+(1-input_mask)*mc.LOSS_COEF_CONF_NEG/(mc.ANCHORS-self.num_objects)),
reduction_indices=[1]
),
name='confidence_loss'
)
tf.add_to_collection('losses', self.conf_loss)
tf.summary.scalar('mean iou', tf.reduce_sum(self.ious)/self.num_objects)
with tf.variable_scope('bounding_box_regression') as scope:
self.bbox_loss = tf.truediv(
tf.reduce_sum(
mc.LOSS_COEF_BBOX * tf.square(
self.input_mask*(self.pred_box_delta-self.box_delta_input))),
self.num_objects,
name='bbox_loss'
)
tf.add_to_collection('losses', self.bbox_loss)
# add above losses as well as weight decay losses to form the total loss
self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
def perform_query_inference(self, q_feats, q_pos_feats, q_neg_feats,
q_num_pos, q_num_neg, q_Y_aug):
"""
Inference for a specific query.
:param q_feats: the features for the query
:param q_pos_feats: the features of the query's positive points
:param q_neg_feats: the features of the query's negative points
:param q_num_pos: the number of positive points for the query
:param q_num_neg: the number of negative points for the query
:param q_Y_aug: the solution of loss-augmented inference for this query
:return: phi_pos: the similarity between the query and each positive point
:return: phi_neg: the similarity between the query and each negative point
:return: AP_score_std: the score of the standard inference solution for AP of this query
:return: AP_score_aug: the score of the loss-augmented inference solution for AP of this query
:return: AP_score_GT: the score of the ground truth solution for AP of this query
"""
S_pos = tf.matmul(q_feats, q_pos_feats, transpose_b=True) # (1, num_pos)
S_neg = tf.matmul(q_feats, q_neg_feats, transpose_b=True) # (1, num_neg)
phi_pos, sorted_inds_pos = tf.nn.top_k(S_pos, k=q_num_pos)
phi_neg, sorted_inds_neg = tf.nn.top_k(S_neg, k=q_num_neg)
phi_pos = tf.transpose(phi_pos)
phi_neg = tf.transpose(phi_neg)
# Score of standard inference
phi_pos_expanded = tf.tile(phi_pos, [1, q_num_neg]) # (num_pos, num_neg)
phi_neg_expanded = tf.tile(tf.transpose(phi_neg), [q_num_pos,
1]) # (num_pos, num_neg)
temp1_Y = tf.greater(phi_pos_expanded,
phi_neg_expanded) # (num_pos, num_neg) of True/False's
temp2_Y = 2. * tf.to_float(temp1_Y) # (num_pos, num_neg) of 2/0's
Y_std = temp2_Y - tf.ones_like(temp2_Y) # (num_pos, num_neg) of 1/-1's
F_std = Y_std * (phi_pos_expanded - phi_neg_expanded) # (num_pos, num_neg)
AP_score_std = tf.truediv(
tf.reduce_sum(F_std), tf.to_float(q_num_pos * q_num_neg))
# Score of loss-augmented inferred ranking
F_aug = q_Y_aug * (phi_pos_expanded - phi_neg_expanded)
AP_score_aug = tf.truediv(
tf.reduce_sum(F_aug), tf.to_float(q_num_pos * q_num_neg))
# Score of the groundtruth
q_Y_GT = tf.ones_like(Y_std)
F_GT = q_Y_GT * (phi_pos_expanded - phi_neg_expanded)
AP_score_GT = tf.truediv(
tf.reduce_sum(F_GT), tf.to_float(q_num_pos * q_num_neg))
AP_score_std = tf.reshape(AP_score_std, [1, 1])
AP_score_aug = tf.reshape(AP_score_aug, [1, 1])
AP_score_GT = tf.reshape(AP_score_GT, [1, 1])
return phi_pos, phi_neg, AP_score_std, AP_score_aug, AP_score_GT
def photoAugmentation(source, target, mean):
"""
Includes contrast and brightness, color channel and gamma change, adding additive gaussian noise
"""
num_batch = source.get_shape()[0].value
height = source.get_shape()[1].value
width = source.get_shape()[2].value
photo_source_list = []
photo_target_list = []
for batch_idx in xrange(num_batch):
img0 = source[batch_idx,:,:,:]
img1 = target[batch_idx,:,:,:]
# Contrast and brightness change
contrast = tf.random_uniform([], minval=-0.3, maxval=0.3)
contrast = contrast + 1.0
bright_sigma = 0.2 # tf.random_uniform([], minval=0.0, maxval=0.2)
brightnessImage = tf.random_normal([height,width,3], mean=0.0, stddev=bright_sigma, dtype=tf.float32)
img0_contrast = tf.add(tf.scalar_mul(contrast, img0), brightnessImage)
img1_contrast = tf.add(tf.scalar_mul(contrast, img1), brightnessImage)
# Color change, may be bad for unsupervised learning
color_change_B = tf.random_uniform([], minval=0.9, maxval=1.1)
color_change_G = tf.random_uniform([], minval=0.9, maxval=1.1)
color_change_R = tf.random_uniform([], minval=0.9, maxval=1.1)
img0_color_B = tf.scalar_mul(color_change_B, img0_contrast[:,:,0])
img0_color_G = tf.scalar_mul(color_change_G, img0_contrast[:,:,1])
img0_color_R = tf.scalar_mul(color_change_R, img0_contrast[:,:,2])
img0_color = tf.pack([img0_color_B, img0_color_G, img0_color_R], axis=2)
img1_color_B = tf.scalar_mul(color_change_B, img1_contrast[:,:,0])
img1_color_G = tf.scalar_mul(color_change_G, img1_contrast[:,:,1])
img1_color_R = tf.scalar_mul(color_change_R, img1_contrast[:,:,2])
img1_color = tf.pack([img1_color_B, img1_color_G, img1_color_R], axis=2)
img0_color = tf.clip_by_value(img0_color, 0.0, 1.0)
img1_color = tf.clip_by_value(img1_color, 0.0, 1.0)
# Gamma
gamma = tf.random_uniform([], minval=0.7, maxval=1.5)
gamma_inv = tf.inv(gamma)
img0_gamma = tf.pow(img0_color, gamma_inv)
img1_gamma = tf.pow(img1_color, gamma_inv)
# Additive gaussian noise
sigma = tf.random_uniform([], minval=0.0, maxval=0.04)
noiseImage = tf.random_normal([height,width,3], mean=0.0, stddev=sigma, dtype=tf.float32)
img0_noise = tf.add(img0_gamma, noiseImage)
img1_noise = tf.add(img1_gamma, noiseImage)
# Subtract mean
img0_mean = tf.sub(img0_noise, tf.truediv(mean, 255.0))
img1_mean = tf.sub(img1_noise, tf.truediv(mean, 255.0))
photo_source_list.append(img0_mean)
photo_target_list.append(img1_mean)
return tf.pack(photo_source_list, axis=0), tf.pack(photo_target_list, axis=0)
def set_anchors(img_shape, fea_shape):
"""Set anchors.
Args:
img_shape: 1-D list with shape `[2]`.
fea_shape: 1-D list with shape `[2]`.
Returns:
anchors: 4-D tensor with shape `[fea_h, fea_w, num_anchors, 4]`
"""
H = fea_shape[0]
W = fea_shape[1]
B = config.NUM_ANCHORS
anchor_shape = tf.constant(config.ANCHOR_SHAPE, dtype=tf.float32)
anchor_shapes = tf.reshape(
tf.concat(
[anchor_shape for i in range(W * H)],
0
),
[H, W, B, 2]
)
center_x = tf.truediv(
tf.range(1, W + 1, 1, dtype=tf.float32), # * img_w,
float(W + 1)
)
center_x = tf.concat(
[center_x for i in range(H * B)], 0
)
center_x = tf.reshape(center_x, [B, H, W])
center_x = tf.transpose(center_x, (1, 2, 0))
center_x = tf.reshape(center_x, [H, W, B, 1])
center_y = tf.truediv(
tf.range(1, H + 1, 1, dtype=tf.float32), # * img_h,
float(H + 1)
)
center_y = tf.concat(
[center_y for i in range(W * B)], 0
)
center_y = tf.reshape(center_y, [B, W, H])
center_y = tf.transpose(center_y, (2, 1, 0))
center_y = tf.reshape(center_y, [H, W, B, 1])
anchors = tf.concat([center_x, center_y, anchor_shapes], 3)
return anchors
def losses(input_mask, labels, ious, box_delta_input, pred_class_probs, pred_conf, pred_box_delta):
batch_size = tf.shape(input_mask)[0]
num_objects = tf.reduce_sum(input_mask, name='num_objects')
with tf.variable_scope('class_regression') as scope:
# cross-entropy: q * -log(p) + (1-q) * -log(1-p)
# add a small value into log to prevent blowing up
class_loss = tf.truediv(
tf.reduce_sum(
(labels * (-tf.log(pred_class_probs + config.EPSILON))
+ (1 - labels) * (-tf.log(1 - pred_class_probs + config.EPSILON)))
* input_mask * config.LOSS_COEF_CLASS),
num_objects,
name='class_loss'
)
tf.losses.add_loss(class_loss)
with tf.variable_scope('confidence_score_regression') as scope:
input_mask_ = tf.reshape(input_mask, [batch_size, config.ANCHORS])
conf_loss = tf.reduce_mean(
tf.reduce_sum(
tf.square((ious - pred_conf))
* (input_mask_ * config.LOSS_COEF_CONF_POS / num_objects
+ (1 - input_mask_) * config.LOSS_COEF_CONF_NEG / (config.ANCHORS - num_objects)),
reduction_indices=[1]
),
name='confidence_loss'
)
tf.losses.add_loss(conf_loss)
with tf.variable_scope('bounding_box_regression') as scope:
bbox_loss = tf.truediv(
tf.reduce_sum(
config.LOSS_COEF_BBOX * tf.square(
input_mask * (pred_box_delta - box_delta_input))),
num_objects,
name='bbox_loss'
)
tf.losses.add_loss(bbox_loss)
# add above losses as well as weight decay losses to form the total loss
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss