def euclidean_distance(self):
x = tf.argmax(tf.reduce_max(self.smoothed_sigm_network, 1), 1)
y = tf.argmax(tf.reduce_max(self.smoothed_sigm_network, 2), 1)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
dy = tf.squeeze(self.desired_points[:, 0, :])
dx = tf.squeeze(self.desired_points[:, 1, :])
sx = tf.squared_difference(x, dx)
sy = tf.squared_difference(y, dy)
l2_dist = tf.sqrt(sx + sy)
return l2_dist
python类reduce_max()的实例源码
def global_max_pool(incoming, name="GlobalMaxPool"):
""" Global Max Pooling.
Input:
4-D Tensor [batch, height, width, in_channels].
Output:
2-D Tensor [batch, pooled dim]
Arguments:
incoming: `Tensor`. Incoming 4-D Tensor.
name: A name for this layer (optional). Default: 'GlobalMaxPool'.
"""
input_shape = utils.get_incoming_shape(incoming)
assert len(input_shape) == 4, "Incoming Tensor shape must be 4-D"
with tf.name_scope(name):
inference = tf.reduce_max(incoming, [1, 2])
# Track output tensor.
tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, inference)
return inference
def FramePooling(frames, method, **unused_params):
"""Pools over the frames of a video.
Args:
frames: A tensor with shape [batch_size, num_frames, feature_size].
method: "average", "max", "attention", or "none".
Returns:
A tensor with shape [batch_size, feature_size] for average, max, or
attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
for none pooling.
Raises:
ValueError: if method is other than "average", "max", "attention", or
"none".
"""
if method == "average":
return tf.reduce_mean(frames, 1)
elif method == "max":
return tf.reduce_max(frames, 1)
elif method == "none":
feature_size = frames.shape_as_list()[2]
return tf.reshape(frames, [-1, feature_size])
else:
raise ValueError("Unrecognized pooling method: %s" % method)
def _build_loss(self):
config = self.config
JX = tf.shape(self.x)[2]
M = tf.shape(self.x)[1]
JQ = tf.shape(self.q)[1]
loss_mask = tf.reduce_max(tf.cast(self.q_mask, 'float'), 1)
losses = tf.nn.softmax_cross_entropy_with_logits(
self.logits, tf.cast(tf.reshape(self.y, [-1, M * JX]), 'float'))
ce_loss = tf.reduce_mean(loss_mask * losses)
tf.add_to_collection('losses', ce_loss)
ce_loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
self.logits2, tf.cast(tf.reshape(self.y2, [-1, M * JX]), 'float')))
tf.add_to_collection("losses", ce_loss2)
self.loss = tf.add_n(tf.get_collection('losses', scope=self.scope), name='loss')
tf.scalar_summary(self.loss.op.name, self.loss)
tf.add_to_collection('ema/scalar', self.loss)
def build_graph(self, q_network, config):
self.ph_reward = tf.placeholder(tf.float32, [None])
self.ph_action = tf.placeholder(tf.int32, [None])
self.ph_terminal = tf.placeholder(tf.int32, [None])
self.ph_q_next_target = tf.placeholder(tf.float32, [None, config.output.action_size])
self.ph_q_next = tf.placeholder(tf.float32, [None, config.output.action_size])
action_one_hot = tf.one_hot(self.ph_action, config.output.action_size)
q_action = tf.reduce_sum(tf.multiply(q_network.node, action_one_hot), axis=1)
if config.double_dqn:
q_max = tf.reduce_sum(self.ph_q_next_target * tf.one_hot(tf.argmax(self.ph_q_next, axis=1),
config.output.action_size), axis=1)
else:
q_max = tf.reduce_max(self.ph_q_next_target, axis=1)
y = self.ph_reward + tf.cast(1 - self.ph_terminal, tf.float32) * tf.scalar_mul(config.rewards_gamma, q_max)
return tf.losses.absolute_difference(q_action, y)
def attentive_pooling(self,input_left,input_right):
Q = tf.reshape(input_left,[self.batch_size,self.max_input_left,len(self.filter_sizes) * self.num_filters],name = 'Q')
A = tf.reshape(input_right,[self.batch_size,self.max_input_right,len(self.filter_sizes) * self.num_filters],name = 'A')
# G = tf.tanh(tf.matmul(tf.matmul(Q,self.U),\
# A,transpose_b = True),name = 'G')
first = tf.matmul(tf.reshape(Q,[-1,len(self.filter_sizes) * self.num_filters]),self.U)
second_step = tf.reshape(first,[self.batch_size,-1,len(self.filter_sizes) * self.num_filters])
result = tf.matmul(second_step,tf.transpose(A,perm = [0,2,1]))
G = tf.tanh(result)
# column-wise pooling ,row-wise pooling
row_pooling = tf.reduce_max(G,1,True,name = 'row_pooling')
col_pooling = tf.reduce_max(G,2,True,name = 'col_pooling')
attention_q = tf.nn.softmax(col_pooling,1,name = 'attention_q')
attention_a = tf.nn.softmax(row_pooling,name = 'attention_a')
R_q = tf.reshape(tf.matmul(Q,attention_q,transpose_a = 1),[self.batch_size,self.num_filters * len(self.filter_sizes),-1],name = 'R_q')
R_a = tf.reshape(tf.matmul(attention_a,A),[self.batch_size,self.num_filters * len(self.filter_sizes),-1],name = 'R_a')
return R_q,R_a
def attentive_pooling(self,input_left,input_right):
Q = tf.reshape(input_left,[self.batch_size,self.max_input_left,len(self.filter_sizes) * self.num_filters],name = 'Q')
A = tf.reshape(input_right,[self.batch_size,self.max_input_right,len(self.filter_sizes) * self.num_filters],name = 'A')
# G = tf.tanh(tf.matmul(tf.matmul(Q,self.U),\
# A,transpose_b = True),name = 'G')
first = tf.matmul(tf.reshape(Q,[-1,len(self.filter_sizes) * self.num_filters]),self.U)
second_step = tf.reshape(first,[self.batch_size,-1,len(self.filter_sizes) * self.num_filters])
result = tf.matmul(second_step,tf.transpose(A,perm = [0,2,1]))
G = tf.tanh(result)
# column-wise pooling ,row-wise pooling
row_pooling = tf.reduce_max(G,1,True,name = 'row_pooling')
col_pooling = tf.reduce_max(G,2,True,name = 'col_pooling')
attention_q = tf.nn.softmax(col_pooling,1,name = 'attention_q')
attention_a = tf.nn.softmax(row_pooling,name = 'attention_a')
R_q = tf.reshape(tf.matmul(Q,attention_q,transpose_a = 1),[self.batch_size,self.num_filters * len(self.filter_sizes),-1],name = 'R_q')
R_a = tf.reshape(tf.matmul(attention_a,A),[self.batch_size,self.num_filters * len(self.filter_sizes),-1],name = 'R_a')
return R_q,R_a
def gradient_binarizing_scalers(grads_and_vars, clip_factor):
""" Get the scalers."""
gradients, variables = zip(*grads_and_vars)
scalers = []
for gradient in gradients:
if gradient is None:
scalers.append(None)
continue
if(clip_factor > 1.0e-5):
mean_gradient = tf.reduce_mean(gradient)
stddev_gradient = tf.sqrt(tf.reduce_mean(tf.square(gradient - mean_gradient)))
scalers.append(clip_factor * stddev_gradient)
else:
scalers.append(tf.reduce_max(tf.abs(gradient)))
return list(zip(scalers, variables))
def average_precision_voc07(precision, recall, name=None):
"""Compute (interpolated) average precision from precision and recall Tensors.
The implementation follows Pascal 2007 guidelines.
See also: https://sanchom.wordpress.com/tag/average-precision/
"""
with tf.name_scope(name, 'average_precision_voc07', [precision, recall]):
# Convert to float64 to decrease error on cumulated sums.
precision = tf.cast(precision, dtype=tf.float64)
recall = tf.cast(recall, dtype=tf.float64)
# Add zero-limit value to avoid any boundary problem...
precision = tf.concat([precision, [0.]], axis=0)
recall = tf.concat([recall, [np.inf]], axis=0)
# Split the integral into 10 bins.
l_aps = []
for t in np.arange(0., 1.1, 0.1):
mask = tf.greater_equal(recall, t)
v = tf.reduce_max(tf.boolean_mask(precision, mask))
l_aps.append(v / 11.)
ap = tf.add_n(l_aps)
return ap
def _build_loss(self):
config = self.config
JX = tf.shape(self.x)[2]
M = tf.shape(self.x)[1]
JQ = tf.shape(self.q)[1]
loss_mask = tf.reduce_max(tf.cast(self.q_mask, 'float'), 1)
losses = tf.nn.softmax_cross_entropy_with_logits(
self.logits, tf.cast(tf.reshape(self.y, [-1, M * JX]), 'float'))
ce_loss = tf.reduce_mean(loss_mask * losses)
tf.add_to_collection('losses', ce_loss)
ce_loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
self.logits2, tf.cast(tf.reshape(self.y2, [-1, M * JX]), 'float')))
tf.add_to_collection("losses", ce_loss2)
self.loss = tf.add_n(tf.get_collection('losses', scope=self.scope), name='loss')
tf.scalar_summary(self.loss.op.name, self.loss)
tf.add_to_collection('ema/scalar', self.loss)
def FramePooling(frames, method, **unused_params):
"""Pools over the frames of a video.
Args:
frames: A tensor with shape [batch_size, num_frames, feature_size].
method: "average", "max", "attention", or "none".
Returns:
A tensor with shape [batch_size, feature_size] for average, max, or
attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
for none pooling.
Raises:
ValueError: if method is other than "average", "max", "attention", or
"none".
"""
if method == "average":
return tf.reduce_mean(frames, 1)
elif method == "max":
return tf.reduce_max(frames, 1)
elif method == "none":
feature_size = frames.shape_as_list()[2]
return tf.reshape(frames, [-1, feature_size])
else:
raise ValueError("Unrecognized pooling method: %s" % method)
def FramePooling(frames, method, **unused_params):
"""Pools over the frames of a video.
Args:
frames: A tensor with shape [batch_size, num_frames, feature_size].
method: "average", "max", "attention", or "none".
Returns:
A tensor with shape [batch_size, feature_size] for average, max, or
attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
for none pooling.
Raises:
ValueError: if method is other than "average", "max", "attention", or
"none".
"""
if method == "average":
return tf.reduce_mean(frames, 1)
elif method == "max":
return tf.reduce_max(frames, 1)
elif method == "none":
feature_size = frames.shape_as_list()[2]
return tf.reshape(frames, [-1, feature_size])
else:
raise ValueError("Unrecognized pooling method: %s" % method)
def FramePooling(frames, method, **unused_params):
"""Pools over the frames of a video.
Args:
frames: A tensor with shape [batch_size, num_frames, feature_size].
method: "average", "max", "attention", or "none".
Returns:
A tensor with shape [batch_size, feature_size] for average, max, or
attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
for none pooling.
Raises:
ValueError: if method is other than "average", "max", "attention", or
"none".
"""
if method == "average":
return tf.reduce_mean(frames, 1)
elif method == "max":
return tf.reduce_max(frames, 1)
elif method == "none":
feature_size = frames.shape_as_list()[2]
return tf.reshape(frames, [-1, feature_size])
else:
raise ValueError("Unrecognized pooling method: %s" % method)
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
# The second dimension of labels must be equal to the longest label length in the batch
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
with tf.control_dependencies([correct_shape_assert]):
labels = tf.identity(labels)
label_shape = tf.shape(labels)
num_batches_tns = tf.stack([label_shape[0]])
max_num_labels_tns = tf.stack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
init = tf.expand_dims(init, 0)
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
dense_mask = dense_mask[:, 0, :]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
shape = [batch_size, tf.reduce_max(label_lengths)]
vals_sparse = gather_nd(labels, indices, shape)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
# Validate and normalize transcriptions. Returns a cleaned version of the label
# or None if it's invalid.
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
# The second dimension of labels must be equal to the longest label length in the batch
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
with tf.control_dependencies([correct_shape_assert]):
labels = tf.identity(labels)
label_shape = tf.shape(labels)
num_batches_tns = tf.stack([label_shape[0]])
max_num_labels_tns = tf.stack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
init = tf.expand_dims(init, 0)
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
dense_mask = dense_mask[:, 0, :]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
shape = [batch_size, tf.reduce_max(label_lengths)]
vals_sparse = gather_nd(labels, indices, shape)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
# Validate and normalize transcriptions. Returns a cleaned version of the label
# or None if it's invalid.
def create_model(self,
model_input,
vocab_size,
num_frames,
**unused_params):
shape = model_input.get_shape().as_list()
frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2)
frames_true = tf.ones(tf.shape(frames_sum))
frames_false = tf.zeros(tf.shape(frames_sum))
frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1])
activation_1 = tf.reduce_max(model_input, axis=1)
activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6)
activation_3 = tf.reduce_min(model_input, axis=1)
model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max")
model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean")
model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min")
final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[shape[2], 3, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2)
weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size=2
cnn_input = model_input
num_filters=[256,256,512]
filter_sizes=[1,2,3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities,axis=1)
moe_inputs = tf.stack(moe_inputs,axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
cnn_size = FLAGS.cnn_cells
num_filters = [cnn_size, cnn_size, cnn_size*2]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities,axis=1)
moe_inputs = tf.stack(moe_inputs,axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, lstm_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
num_filters = [256, 256, 512]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn_gate(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities, axis=1)
moe_inputs = tf.stack(moe_inputs, axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
num_filters = [256, 256, 512]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn_glu(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities, axis=1)
moe_inputs = tf.stack(moe_inputs, axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
return result