def build_export_output(net_out, H, W, max_number_length, C, threshold):
B, threshold, sprs_output_box_count = max_number_length, threshold, 100
net_out = tf.reshape(net_out, [H, W, B, -1])
boxes, boxes_scores, classes_probs = net_out[:, :, :, :4], net_out[:, :, :, 4], net_out[:, :, :, 5:]
row = np.concatenate([np.ones([1, W, B], dtype=np.float32) * i for i in range(H)], axis=0)
col = np.concatenate([np.ones([H, 1, B], dtype=np.float32) * i for i in range(W)], axis=1)
anchors_w = np.concatenate([np.ones([H, W, 1], dtype=np.float32) * anchors[2 * i + 0] for i in range(B)], axis=2)
anchors_h = np.concatenate([np.ones([H, W, 1], dtype=np.float32) * anchors[2 * i + 1] for i in range(B)], axis=2)
with ops.name_scope(None, 'calc_boxes_coordinates'):
boxes = tf.concat([
tf.expand_dims((tf.sigmoid(boxes[:, :, :, 0]) + col) / W, 3),
tf.expand_dims((tf.sigmoid(boxes[:, :, :, 1]) + row) / H, 3),
tf.expand_dims(tf.exp(boxes[:, :, :, 2]) * anchors_w / W, 3),
tf.expand_dims(tf.exp(boxes[:, :, :, 3]) * anchors_h / H, 3),
], axis=3)
boxes = tf.cast(boxes, tf.float32)
with ops.name_scope(None, 'calc_boxes_scores'):
boxes_scores = tf.sigmoid(boxes_scores)
boxes_scores = tf.nn.softmax(classes_probs) * tf.expand_dims(boxes_scores, 3)
boxes_scores = boxes_scores * tf.cast(boxes_scores > threshold, tf.float32)
boxes_scores = tf.cast(boxes_scores, tf.float32)
with ops.name_scope(None, 'non_max_suppression'):
boxes = tf.reshape(boxes, [H * W * B, 4])
sprs_boxes, sprs_boxes_scores = [], []
for i in range(C):
box_scores = tf.reshape(boxes_scores[:, :, :, i], [H * W * B])
sprs_boxes_indices = tf.image.non_max_suppression(boxes, box_scores, sprs_output_box_count, iou_threshold=0.4)
box_scores = box_scores * tf.scatter_nd(
tf.reshape(sprs_boxes_indices, [-1, 1]),
tf.ones(tf.shape(sprs_boxes_indices), dtype=tf.float32), [H * W * B])
sprs_boxes_scores.append(tf.reshape(box_scores, [H * W * B, 1]))
with ops.name_scope(None, 'select_boxes'):
sprs_boxes_scores = tf.concat(sprs_boxes_scores, axis=1)
classes = tf.argmax(sprs_boxes_scores, axis=1)
classes_probs = tf.reduce_max(sprs_boxes_scores, axis=1)
selected_box_mask = classes_probs > threshold
selected_classes = tf.boolean_mask(classes, selected_box_mask)
selected_boxes = tf.boolean_mask(boxes, selected_box_mask)
selected_classes_probs = tf.boolean_mask(classes_probs, selected_box_mask)
lefts = selected_boxes[:, 0] - selected_boxes[:, 2] / 2
lefts = tf.where(lefts < 0, tf.zeros(tf.shape(lefts)), lefts)
selected_boxes = tf.concat([
tf.expand_dims(lefts, 1),
tf.expand_dims(selected_boxes[:, 1] - selected_boxes[:, 3] / 2, 1),
tf.expand_dims(selected_boxes[:, 2], 1),
tf.expand_dims(selected_boxes[:, 3], 1),
], axis=1)
selected_lefts = selected_boxes[:, 0]
with ops.name_scope(None, 'sort_boxes'):
sorted_lefts, sorted_lefts_indices = tf.nn.top_k(selected_lefts * -1, tf.shape(selected_lefts)[0])
sorted_classes = tf.gather(selected_classes, sorted_lefts_indices)
sorted_boxes = tf.gather(selected_boxes, sorted_lefts_indices)
sorted_classes_probs = tf.gather(selected_classes_probs, sorted_lefts_indices)
return sorted_lefts * -1, sorted_boxes, sorted_classes, sorted_classes_probs
python类scatter_nd()的实例源码
def encode_annos(image, labels, bboxes, anchors, num_classes):
"""Encode annotations for losses computations.
All the output tensors have a fix shape(none dynamic dimention).
Args:
image: 4-D with shape `[H, W, C]`.
b_labels: 2-D with shape `[num_bounding_boxes]`.
b_bboxes: 3-D with shape `[num_bounding_boxes, 4]`. Scaled.
anchors: 4-D tensor with shape `[fea_h, fea_w, num_anchors, 4]`
Returns:
input_mask: 2-D with shape `[num_anchors, 1]`, indicate which anchor to be used to cal loss.
labels_input: 2-D with shape `[num_anchors, num_classes]`, one hot encode for every anchor.
box_delta_input: 2-D with shape `[num_anchors, 4]`.
box_input: 2-D with shape '[num_anchors, 4]'.
"""
anchors_shape = anchors.get_shape().as_list()
fea_h = anchors_shape[0]
fea_w = anchors_shape[1]
num_anchors = anchors_shape[2] * fea_h * fea_w
anchors = tf.reshape(anchors, [num_anchors, 4]) # reshape anchors
# Cal iou, find the target anchor
_anchors = xywh_to_yxyx(anchors)
ious, indices = batch_iou_fast(_anchors, bboxes)
indices = tf.reshape(indices, shape=[-1, 1])
target_anchors = tf.gather(anchors, indices)
target_anchors = tf.squeeze(target_anchors, axis=1)
delta = batch_delta(yxyx_to_xywh_(bboxes), target_anchors)
# bbox
box_input = tf.scatter_nd(
indices,
bboxes,
shape=[num_anchors, 4]
)
# label
labels_input = tf.scatter_nd(
indices,
tf.one_hot(labels, num_classes),
shape=[num_anchors, num_classes]
)
# anchor mask
onehot_anchor = tf.one_hot(indices, num_anchors)
onehot_anchor = tf.squeeze(onehot_anchor, axis=1)
print("indices shape:", indices.get_shape().as_list())
print("one hot anchors shape:", onehot_anchor.get_shape().as_list())
input_mask = tf.reduce_sum(onehot_anchor, axis=0)
input_mask = tf.reshape(input_mask, shape=[-1, 1])
# delta
box_delta_input = tf.scatter_nd(
indices,
delta,
shape=[num_anchors, 4]
)
return input_mask, labels_input, box_delta_input, box_input
# TODO(shizehao): align anchor center to the grid
def create_loss(final_outputs, answers, answer_lens):
'''
Final outputs of the decoder may have different length with
target answer. So we should pad the outputs if the outputs
are shorter than target answer, and pad the target answers
if outputs are longer than answers.
:param answer_lens:
`Tensor` that representing length of answers
:param final_outputs: the output of decoder
:param answers: the target answers
:return: tuple of loss_op and train_op
'''
with tf.variable_scope('loss') as scope:
answsers = tf.transpose(answers, (1, 0))
print("target_tensor[0]: ", answsers[0])
print("final_outputs: ", final_outputs.get_shape())
print("decoder_inputs_tensor: ", answsers.get_shape())
answer_max_len = tf.reduce_max(answer_lens)
output_len = tf.shape(final_outputs)[0]
def loss_with_padded_outputs():
indexes = [[0, 1]]
values = tf.expand_dims(answer_max_len - output_len - 1, axis=0)
# because rank of final outputs tensor is 3, so the shape is (3, 2)
shape = [3, 2]
paddings = tf.scatter_nd(indexes, values, shape)
padded_outputs = tf.pad(final_outputs, paddings)
return tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=padded_outputs, labels=answsers[1:])
def loss_with_padded_answers():
indexes = [[0, 1]]
values = tf.expand_dims(output_len - answer_max_len + 1, axis=0)
# because rank of answers tensor is 2, so the shape is (2, 2)
shape = [2, 2]
paddings = tf.scatter_nd(indexes, values, shape)
padded_answer = tf.pad(answsers, paddings)
return tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=final_outputs, labels=padded_answer[1:])
losses = tf.cond(output_len < answer_max_len, loss_with_padded_outputs, loss_with_padded_answers)
losses_length = tf.shape(losses)[0]
loss_mask = tf.sequence_mask(
tf.to_int32(answer_lens), losses_length)
losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
# self.loss = tf.reduce_mean(losses)
loss = tf.reduce_sum(losses) / tf.to_float(tf.reduce_sum(answer_lens - 1))
return loss
def make_subseparable_kernel(kernel_size, input_channels, filters, separability,
kernel_initializer, kernel_regularizer):
"""Make a kernel to do subseparable convolution wiht `tf.nn.conv2d`.
Args:
kernel_size: (height, width) tuple.
input_channels: Number of input channels.
filters: Number of output channels.
separability: Integer denoting separability.
kernel_initializer: Initializer to use for the kernel.
kernel_regularizer: Regularizer to use for the kernel.
Returns:
A 4D tensor.
"""
if separability == 1:
# Non-separable convolution
return tf.get_variable(
"kernel",
kernel_size + (input_channels, filters),
initializer=kernel_initializer,
regularizer=kernel_regularizer)
elif separability == 0 or separability == -1:
# Separable convolution
# TODO(rshin): Check initialization is as expected, as these are not 4D.
depthwise_kernel = tf.get_variable(
"depthwise_kernel",
kernel_size + (input_channels,),
initializer=kernel_initializer,
regularizer=kernel_regularizer)
pointwise_kernel = tf.get_variable(
"pointwise_kernel", (input_channels, filters),
initializer=kernel_initializer,
regularizer=kernel_regularizer)
expanded_depthwise_kernel = tf.transpose(
tf.scatter_nd(
indices=tf.tile(
tf.expand_dims(tf.range(0, input_channels), axis=1), [1, 2]),
updates=tf.transpose(depthwise_kernel, (2, 0, 1)),
shape=(input_channels, input_channels) + kernel_size), (2, 3, 0, 1))
return tf.reshape(
tf.matmul(
tf.reshape(expanded_depthwise_kernel, (-1, input_channels)),
pointwise_kernel), kernel_size + (input_channels, filters))
elif separability >= 2:
assert filters % separability == 0, (filters, separability)
assert input_channels % separability == 0, (filters, separability)
raise NotImplementedError
elif separability <= -2:
separability *= -1
assert filters % separability == 0, (filters, separability)
assert input_channels % separability == 0, (filters, separability)
raise NotImplementedError
def scatter_add_tensor(ref, indices, updates, name=None):
"""
Adds sparse updates to a variable reference.
This operation outputs ref after the update is done. This makes it
easier to chain operations that need to use the reset value.
Duplicate indices: if multiple indices reference the same location,
their contributions add.
Requires updates.shape = indices.shape + ref.shape[1:].
:param ref: A Tensor. Must be one of the following types: float32,
float64, int64, int32, uint8, uint16, int16, int8, complex64, complex128,
qint8, quint8, qint32, half.
:param indices: A Tensor. Must be one of the following types: int32,
int64. A tensor of indices into the first dimension of ref.
:param updates: A Tensor. Must have the same dtype as ref. A tensor of
updated values to add to ref
:param name: A name for the operation (optional).
:return: Same as ref. Returned as a convenience for operations that want
to use the updated values after the update is done.
"""
with tensorflow.name_scope(name, 'scatter_add_tensor', [ref, indices, updates]) as scope:
ref = tensorflow.convert_to_tensor(ref, name='ref')
indices = tensorflow.convert_to_tensor(indices, name='indices')
updates = tensorflow.convert_to_tensor(updates, name='updates')
ref_shape = tensorflow.shape(ref, out_type=indices.dtype, name='ref_shape')
scattered_updates = tensorflow.scatter_nd(indices, updates, ref_shape, name='scattered_updates')
with tensorflow.control_dependencies([tensorflow.assert_equal(ref_shape, tensorflow.shape(scattered_updates, out_type=indices.dtype))]):
output = tensorflow.add(ref, scattered_updates, name=scope)
return output