def switch(condition, then_tensor, else_tensor):
"""
Keras' implementation of switch for tensorflow uses tf.switch which accepts only scalar conditions.
It should use tf.select instead.
"""
if K.backend() == 'tensorflow':
import tensorflow as tf
condition_shape = condition.get_shape()
input_shape = then_tensor.get_shape()
if condition_shape[-1] != input_shape[-1] and condition_shape[-1] == 1:
# This means the last dim is an embedding dim. Keras does not mask this dimension. But tf wants
# the condition and the then and else tensors to be the same shape.
condition = K.dot(tf.cast(condition, tf.float32), tf.ones((1, input_shape[-1])))
return tf.select(tf.cast(condition, dtype=tf.bool), then_tensor, else_tensor)
else:
import theano.tensor as T
return T.switch(condition, then_tensor, else_tensor)
python类select()的实例源码
def sample_from_discretized_mix_logistic(l,nr_mix):
ls = int_shape(l)
xs = ls[:-1] + [3]
# unpack parameters
logit_probs = l[:, :, :, :nr_mix]
l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix*3])
# sample mixture indicator from softmax
sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform(logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32)
sel = tf.reshape(sel, xs[:-1] + [1,nr_mix])
# select logistic parameters
means = tf.reduce_sum(l[:,:,:,:,:nr_mix]*sel,4)
log_scales = tf.maximum(tf.reduce_sum(l[:,:,:,:,nr_mix:2*nr_mix]*sel,4), -7.)
coeffs = tf.reduce_sum(tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix])*sel,4)
# sample from logistic & clip to interval
# we don't actually round to the nearest 8bit value when sampling
u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5)
x = means + tf.exp(log_scales)*(tf.log(u) - tf.log(1. - u))
x0 = tf.minimum(tf.maximum(x[:,:,:,0], -1.), 1.)
x1 = tf.minimum(tf.maximum(x[:,:,:,1] + coeffs[:,:,:,0]*x0, -1.), 1.)
x2 = tf.minimum(tf.maximum(x[:,:,:,2] + coeffs[:,:,:,1]*x0 + coeffs[:,:,:,2]*x1, -1.), 1.)
return tf.concat(3,[tf.reshape(x0,xs[:-1]+[1]), tf.reshape(x1,xs[:-1]+[1]), tf.reshape(x2,xs[:-1]+[1])])
def broadcast(tensor, target_tensor):
"""Broadcast a tensor to match the shape of a target tensor.
Args:
tensor (Tensor): tensor to be tiled
target_tensor (Tensor): tensor whose shape is to be matched
"""
rank = lambda t: t.get_shape().ndims
assert rank(tensor) == rank(target_tensor) # TODO: assert that tensors have no overlapping non-unity dimensions
orig_shape = tf.shape(tensor)
target_shape = tf.shape(target_tensor)
# if dim == 1, set it to target_dim
# else, set it to 1
tiling_factor = tf.select(tf.equal(orig_shape, 1), target_shape, tf.ones([rank(tensor)], dtype=tf.int32))
broadcasted = tf.tile(tensor, tiling_factor)
# Add static shape information
broadcasted.set_shape(target_tensor.get_shape())
return broadcasted
def change_pad_value(values, mask, pad_val):
"""Given a set of values and a pad mask, change the value of all pad entries.
Args:
values (Tensor): of shape [batch_size, seq_length, :, ..., :].
mask (Tensor): binary float tensor of shape [batch_size, seq_length]
pad_val (float): value to set all pad entries to
Returns:
Tensor: a new Tensor of same shape as values
"""
# broadcast the mask to match shape of values
mask = expand_dims_for_broadcast(mask, values) # (batch_size, seq_length, 1, ..., 1)
mask = broadcast(mask, values)
mask = tf.cast(mask, tf.bool) # cast to bool
# broadcast val
broadcast_val = pad_val * tf.ones(tf.shape(values))
new_values = tf.select(mask, values, broadcast_val)
return new_values
def broadcast(tensor, target_tensor):
"""Broadcast a tensor to match the shape of a target tensor.
Args:
tensor (Tensor): tensor to be tiled
target_tensor (Tensor): tensor whose shape is to be matched
"""
rank = lambda t: t.get_shape().ndims
assert rank(tensor) == rank(target_tensor) # TODO: assert that tensors have no overlapping non-unity dimensions
orig_shape = tf.shape(tensor)
target_shape = tf.shape(target_tensor)
# if dim == 1, set it to target_dim
# else, set it to 1
tiling_factor = tf.select(tf.equal(orig_shape, 1), target_shape, tf.ones([rank(tensor)], dtype=tf.int32))
broadcasted = tf.tile(tensor, tiling_factor)
# Add static shape information
broadcasted.set_shape(target_tensor.get_shape())
return broadcasted
def change_pad_value(values, mask, pad_val):
"""Given a set of values and a pad mask, change the value of all pad entries.
Args:
values (Tensor): of shape [batch_size, seq_length, :, ..., :].
mask (Tensor): binary float tensor of shape [batch_size, seq_length]
pad_val (float): value to set all pad entries to
Returns:
Tensor: a new Tensor of same shape as values
"""
# broadcast the mask to match shape of values
mask = expand_dims_for_broadcast(mask, values) # (batch_size, seq_length, 1, ..., 1)
mask = broadcast(mask, values)
mask = tf.cast(mask, tf.bool) # cast to bool
# broadcast val
broadcast_val = pad_val * tf.ones(tf.shape(values))
new_values = tf.select(mask, values, broadcast_val)
return new_values
def backward(self):
dx_flat = self.probs
coords = tf.transpose(tf.pack([tf.range(self.N * self.T), self.y_flat]))
binary_mask = tf.sparse_to_dense(coords, dx_flat.get_shape(), 1)
# convert 1/0 to True/False
binary_mask = tf.cast(binary_mask, tf.bool)
decremented = dx_flat - 1
# make new x out of old values or decresed, depending on mask
dx_flat = tf.select(binary_mask, decremented, dx_flat)
dx_flat /= self.N
dx_flat *= self.mask_flat[:, None]
dx = tf.reshape(dx_flat, [self.N, self.T, self.V])
return dx
def backward(self):
dx_flat = self.probs
coords = tf.transpose(tf.pack([tf.range(self.N * self.T), self.y_flat]))
binary_mask = tf.sparse_to_dense(coords, dx_flat.get_shape(), 1)
# convert 1/0 to True/False
binary_mask = tf.cast(binary_mask, tf.bool)
decremented = dx_flat - 1
# make new x out of old values or decresed, depending on mask
dx_flat = tf.select(binary_mask, decremented, dx_flat)
dx_flat /= self.N
dx_flat *= self.mask_flat[:, None]
dx = tf.reshape(dx_flat, [self.N, self.T, self.V])
return dx
def __init__(self, action_bounds):
self.graph = tf.Graph()
with self.graph.as_default():
self.sess = tf.Session()
self.action_size = len(action_bounds[0])
self.action_input = tf.placeholder(tf.float32, [None, self.action_size])
self.pmax = tf.constant(action_bounds[0], dtype = tf.float32)
self.pmin = tf.constant(action_bounds[1], dtype = tf.float32)
self.prange = tf.constant([x - y for x, y in zip(action_bounds[0],action_bounds[1])], dtype = tf.float32)
self.pdiff_max = tf.div(-self.action_input+self.pmax, self.prange)
self.pdiff_min = tf.div(self.action_input - self.pmin, self.prange)
self.zeros_act_grad_filter = tf.zeros([self.action_size])
self.act_grad = tf.placeholder(tf.float32, [None, self.action_size])
self.grad_inverter = tf.select(tf.greater(self.act_grad, self.zeros_act_grad_filter), tf.mul(self.act_grad, self.pdiff_max), tf.mul(self.act_grad, self.pdiff_min))
def block_shrinkage_conv(V,mu,rho):
coef = 0.5
V_shape = tf.shape(V); one_val = tf.constant(1.0)
b = tf.div(mu,rho)
V_shape1 = tf.concat(0,[tf.mul(tf.slice(V_shape,[2],[1]),tf.slice(V_shape,[3],[1])),tf.mul(tf.slice(V_shape,[0],[1]),tf.slice(V_shape,[1],[1]))])
V = tf.reshape(tf.transpose(V,perm=[2,3,0,1]),V_shape1)
norm_V = frobenius_norm_block(V,1)
norm_V_per_dimension = tf.div(norm_V,tf.cast(tf.slice(V_shape1,[1],[1]),'float'))
zero_part = tf.zeros(V_shape1)
zero_ind = tf.greater_equal(b,norm_V_per_dimension)
num_zero = tf.reduce_sum(tf.cast(zero_ind,'float'))
# f4 = lambda: tf.greater_equal(tf.truediv(tf.add(tf.reduce_min(fro),tf.reduce_mean(fro)),2.0),fro)
f4 = lambda: tf.greater_equal(tf.reduce_mean(norm_V),norm_V)
f5 = lambda: zero_ind
zero_ind = tf.cond(tf.greater(num_zero,tf.mul(coef,tf.cast(V_shape1[0],'float'))),f4,f5)
G = tf.select(zero_ind,zero_part,tf.mul(tf.sub(one_val,tf.div(b,tf.reshape(norm_V,[-1,1]))),V))
G_shape = tf.concat(0,[tf.slice(V_shape,[2],[1]),tf.slice(V_shape,[3],[1]),tf.slice(V_shape,[0],[1]),tf.slice(V_shape,[1],[1])])
G = tf.transpose(tf.reshape(G,G_shape),perm=[2,3,0,1])
return G,zero_ind
def block_truncate_conv(V,mu,rho):
coef = 0.5
V_shape = tf.shape(V)
b = tf.sqrt(tf.div(tf.mul(2.,mu),rho)) #threshold
# Reshape the 4D tensor of weights to a 2D matrix with rows containing the conv filters in vectorized form.
V_shape1 = tf.concat(0,[tf.mul(tf.slice(V_shape,[2],[1]),tf.slice(V_shape,[3],[1])),tf.mul(tf.slice(V_shape,[0],[1]),tf.slice(V_shape,[1],[1]))])
V = tf.reshape(tf.transpose(V,perm=[2,3,0,1]),V_shape1)
norm_V = frobenius_norm_block(V,1)
norm_V_per_dimension = tf.div(norm_V,tf.cast(tf.slice(V_shape1,[1],[1]),'float'))
# Implementation of Eq.10 in the paper using if condition inside the TensorFlow graph with tf.cond
zero_part = tf.zeros(V_shape1)
zero_ind = tf.greater_equal(b,norm_V_per_dimension)
num_zero = tf.reduce_sum(tf.cast(zero_ind,'float'))
# You can pass parameters to the functions in tf.cond() using lambda
f4 = lambda: tf.greater_equal(tf.reduce_mean(norm_V),norm_V)
f5 = lambda: zero_ind
zero_ind = tf.cond(tf.greater(num_zero,tf.mul(coef,tf.cast(V_shape1[0],'float'))),f4,f5)
G = tf.select(zero_ind,zero_part,V)
G_shape = tf.concat(0,[tf.slice(V_shape,[2],[1]),tf.slice(V_shape,[3],[1]),tf.slice(V_shape,[0],[1]),tf.slice(V_shape,[1],[1])])
G = tf.transpose(tf.reshape(G,G_shape),perm=[2,3,0,1])
return G,zero_ind
def _apply_func(self, activations, is_training, reuse):
'''
apply own functionality
Args:
activations: the ioutputs to the wrapped activation function
is_training: whether or not the network is in training mode
reuse: wheter or not the variables in the network should be reused
Returns:
the output to the activation function
'''
with tf.variable_scope('l2_norm', reuse=reuse):
#compute the mean squared value
sig = tf.reduce_mean(tf.square(activations), 1, keep_dims=True)
#divide the input by the mean squared value
normalized = activations/sig
#if the mean squared value is larger then one select the normalized
#value otherwise select the unnormalised one
return tf.select(tf.greater(tf.reshape(sig, [-1]), 1),
normalized, activations)
def _compute_huber(predictions, labels, delta=1.0):
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
predictions = tf.to_float(predictions)
labels = tf.to_float(labels)
delta = tf.to_float(delta)
diff = predictions - labels
diff_abs = tf.abs(diff)
delta_fact = 0.5 * tf.square(delta)
condition = tf.less(diff_abs, delta)
left_opt = 0.5 * tf.square(diff)
right_opt = delta * diff_abs - delta_fact
losses_val = tf.select(condition, left_opt, right_opt)
return losses_val
# Returns non-reduced tensor of unweighted losses with batch dimension matching inputs
def yoloconfidloss(y_true, y_pred, t):
real_y_true = tf.select(t, y_true, K.zeros_like(y_true))
pobj = K.sigmoid(y_pred)
lo = K.square(real_y_true-pobj)
value_if_true = lamda_confid_obj*(lo)
value_if_false = lamda_confid_noobj*(lo)
loss1 = tf.select(t, value_if_true, value_if_false)
loss = K.mean(loss1)
#
noobj = tf.select(t, K.zeros_like(y_pred), pobj)
noobjcount = tf.select(t, K.zeros_like(y_pred), K.ones_like(y_pred))
ave_anyobj = K.sum(noobj) / K.sum(noobjcount)
#ave_anyobj = K.mean(pobj)
obj = tf.select(t, pobj, K.zeros_like(y_pred))
objcount = tf.select(t, K.ones_like(y_pred), K.zeros_like(y_pred))
#ave_obj = K.mean( K.sum(obj, axis=1) / (K.sum(objcount, axis=1)+0.000001) ) # prevent div 0
ave_obj = K.sum(obj) / (K.sum(objcount)+0.000001) # prevent div 0
return loss, ave_anyobj, ave_obj
# shape is (gridcells*2,)
def yoloclassloss(y_true, y_pred, t):
lo = K.square(y_true-y_pred)
value_if_true = lamda_class*(lo)
value_if_false = K.zeros_like(y_true)
loss1 = tf.select(t, value_if_true, value_if_false)
# only extract predicted class value at obj location
cat = K.sum(tf.select(t, y_pred, K.zeros_like(y_pred)), axis=1)
# check valid class value
objsum = K.sum(y_true, axis=1)
# if objsum > 0.5 , means it contain some valid obj(may be 1,2.. objs)
isobj = K.greater(objsum, 0.5)
# only extract class value at obj location
valid_cat = tf.select(isobj, cat, K.zeros_like(cat))
# prevent div 0
ave_cat = tf.select(K.greater(K.sum(objsum),0.5), K.sum(valid_cat) / K.sum(objsum) , -1)
return K.mean(loss1), ave_cat
def sample(self):
"""Define the computation graph for one hmc sampling."""
accept_rate, new_pos = hmc_sample(
self.pos, self.stepsize, self.num_steps, self.potential_fn
)
new_avg_accept_rate = tf.add(
self.avg_accept_slowness * self.avg_accept_rate,
(1.0 - self.avg_accept_slowness) * accept_rate
)
new_stepsize = tf.select(new_avg_accept_rate > self.target_accept_rate,
self.stepsize * self.stepsize_inc,
self.stepsize * self.stepsize_dec)
new_stepsize = tf.clip_by_value(
new_stepsize, self.stepsize_min, self.stepsize_max
)
updates = [self.pos.assign(new_pos),
self.stepsize.assign(new_stepsize),
self.avg_accept_rate.assign(new_avg_accept_rate)]
return new_pos, updates
# test =================
def p_ternarize(x, p):
x = tf.tanh(x)
shape = x.get_shape()
thre = tf.get_variable('T', trainable=False, collections=[tf.GraphKeys.VARIABLES, 'thresholds'],
initializer=0.05)
flat_x = tf.reshape(x, [-1])
k = int(flat_x.get_shape().dims[0].value * (1 - p))
topK, _ = tf.nn.top_k(tf.abs(flat_x), k)
update_thre = thre.assign(topK[-1])
tf.add_to_collection('update_thre_op', update_thre)
mask = tf.zeros(shape)
mask = tf.select((x > thre) | (x < -thre), tf.ones(shape), mask)
with G.gradient_override_map({"Sign": "Identity", "Mul": "Add"}):
w = tf.sign(x) * tf.stop_gradient(mask)
tf.histogram_summary(w.name, w)
return w
def tw_ternarize(x, thre):
shape = x.get_shape()
thre_x = tf.stop_gradient(tf.reduce_max(tf.abs(x)) * thre)
w_p = tf.get_variable('Wp', collections=[tf.GraphKeys.VARIABLES, 'positives'], initializer=1.0)
w_n = tf.get_variable('Wn', collections=[tf.GraphKeys.VARIABLES, 'negatives'], initializer=1.0)
tf.scalar_summary(w_p.name, w_p)
tf.scalar_summary(w_n.name, w_n)
mask = tf.ones(shape)
mask_p = tf.select(x > thre_x, tf.ones(shape) * w_p, mask)
mask_np = tf.select(x < -thre_x, tf.ones(shape) * w_n, mask_p)
mask_z = tf.select((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask)
with G.gradient_override_map({"Sign": "Identity", "Mul": "Add"}):
w = tf.sign(x) * tf.stop_gradient(mask_z)
w = w * mask_np
tf.histogram_summary(w.name, w)
return w
def tf_nan_to_zeros_float32(tensor):
"""
Mask NaN values with zeros
:param tensor that might have Nan values
:return: tensor with replaced Nan values with zeros
"""
return tf.select(tf.is_nan(tensor), tf.zeros(tf.shape(tensor), dtype=tf.float32), tensor)
def huber_loss(x, delta=1.0):
# https://en.wikipedia.org/wiki/Huber_loss
return tf.select(
tf.abs(x) < delta,
tf.square(x) * 0.5,
delta * (tf.abs(x) - 0.5 * delta)
)
def __init__(self, action_bounds):
self.sess = tf.InteractiveSession()
self.action_size = len(action_bounds[0])
self.action_input = tf.placeholder(tf.float32, [None, self.action_size])
self.pmax = tf.constant(action_bounds[0], dtype = tf.float32)
self.pmin = tf.constant(action_bounds[1], dtype = tf.float32)
self.prange = tf.constant([x - y for x, y in zip(action_bounds[0],action_bounds[1])], dtype = tf.float32)
self.pdiff_max = tf.div(-self.action_input+self.pmax, self.prange)
self.pdiff_min = tf.div(self.action_input - self.pmin, self.prange)
self.zeros_act_grad_filter = tf.zeros([self.action_size])
self.act_grad = tf.placeholder(tf.float32, [None, self.action_size])
self.grad_inverter = tf.select(tf.greater(self.act_grad, self.zeros_act_grad_filter), tf.mul(self.act_grad, self.pdiff_max), tf.mul(self.act_grad, self.pdiff_min))
def _relu(self, x, leakiness=0.0):
"""Relu, with optional leaky support."""
return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
def clipped_error(x):
# Huber loss
try:
return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
except:
return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
# return 0.5 * tf.square(x)
def sample_from_discretized_mix_logistic(l, nr_mix):
ls = int_shape(l)
xs = ls[:-1] + [3]
# unpack parameters
logit_probs = l[:, :, :, :nr_mix]
l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3])
# sample mixture indicator from softmax
sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform(
logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32)
sel = tf.reshape(sel, xs[:-1] + [1, nr_mix])
# select logistic parameters
means = tf.reduce_sum(l[:, :, :, :, :nr_mix] * sel, 4)
log_scales = tf.maximum(tf.reduce_sum(
l[:, :, :, :, nr_mix:2 * nr_mix] * sel, 4), -7.)
coeffs = tf.reduce_sum(tf.nn.tanh(
l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) * sel, 4)
# sample from logistic & clip to interval
# we don't actually round to the nearest 8bit value when sampling
u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5)
x = means + tf.exp(log_scales) * (tf.log(u) - tf.log(1. - u))
x0 = tf.minimum(tf.maximum(x[:, :, :, 0], -1.), 1.)
x1 = tf.minimum(tf.maximum(
x[:, :, :, 1] + coeffs[:, :, :, 0] * x0, -1.), 1.)
x2 = tf.minimum(tf.maximum(
x[:, :, :, 2] + coeffs[:, :, :, 1] * x0 + coeffs[:, :, :, 2] * x1, -1.), 1.)
return tf.concat([tf.reshape(x0, xs[:-1] + [1]), tf.reshape(x1, xs[:-1] + [1]), tf.reshape(x2, xs[:-1] + [1])], 3)
def elu(x, alpha=1.):
'''Exponential linear unit.
# Arguments
x: Tensor to compute the activation function for.
alpha: scalar
'''
res = tf.nn.elu(x)
if alpha == 1:
return res
else:
return tf.select(x > 0, res, alpha * res)
def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
if seed is None:
seed = np.random.randint(10e6)
return tf.select(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p,
tf.ones(shape, dtype=dtype),
tf.zeros(shape, dtype=dtype))
# CTC
# tensorflow has a native implemenation, but it uses sparse tensors
# and therefore requires a wrapper for Keras. The functions below convert
# dense to sparse tensors and also wraps up the beam search code that is
# in tensorflow's CTC implementation
def huber_loss(x, delta=1.0):
# https://en.wikipedia.org/wiki/Huber_loss
return tf.select(
tf.abs(x) < delta,
tf.square(x) * 0.5,
delta * (tf.abs(x) - 0.5 * delta)
)
def random_binomial(self, shape, p=0.0, dtype=_FLOATX):
return tf.select(tf.random_uniform(shape, dtype=dtype) <= p, tf.ones(shape), tf.zeros(shape))
# NUMPY API
def logistic_loss_cond(scores, labels):
# Classification loss as the average of weighed per-score loss
cond = tf.select(tf.equal(labels, tf.zeros(tf.shape(labels))),
tf.zeros(tf.shape(labels)),
tf.nn.sigmoid_cross_entropy_with_logits(logits = scores, labels = labels)
)
cls_loss = tf.reduce_mean(tf.reduce_sum(cond, [1, 2, 3]))
return cls_loss
def certainty(self):
certainty = self.seg_prediction * tf.log(self.seg_prediction)
certainty = -tf.reduce_sum(certainty,reduction_indices=2)
s1 = tf.ones(tf.shape(certainty))
csum = tf.cumsum(s1,axis=1)
mask = tf.less_equal(csum,tf.cast(tf.tile(tf.expand_dims(self._length,1),[1,tf.shape(certainty)[1]]),tf.float32))
mask = tf.select(mask, tf.ones(tf.shape(certainty)),
tf.zeros(tf.shape(certainty)))
certainty *= mask
certainty = tf.reduce_sum(certainty, reduction_indices=1)
return certainty