def huber_loss(x, delta=1.0):
"""Reference: https://en.wikipedia.org/wiki/Huber_loss"""
return tf.where(
tf.abs(x) < delta,
tf.square(x) * 0.5,
delta * (tf.abs(x) - 0.5 * delta)
)
# ================================================================
# Basic Stuff
# ================================================================
# ================================================================
# Theano-like Function
# ================================================================
# ================================================================
# Optimizer utils
# ================================================================
python类abs()的实例源码
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
sigma_2 = sigma ** 2
box_diff = bbox_pred - bbox_targets
in_box_diff = bbox_inside_weights * box_diff
abs_in_box_diff = tf.abs(in_box_diff)
smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
+ (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
out_loss_box = bbox_outside_weights * in_loss_box
loss_box = tf.reduce_mean(tf.reduce_sum(
out_loss_box,
axis=dim
))
return loss_box
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'):
num_inputs=df_dim*4
theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05))
log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0))
W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0))
W = tf.reshape(W,[-1,num_kernels*dim_per_kernel])
x = input
x=tf.reshape(x, [batchsize,num_inputs])
activation = tf.matmul(x, W)
activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel])
abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2),
1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1))
f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif))
print(f.get_shape())
print(input.get_shape())
return tf.concat(1,[x, f])
def cyclic_learning_rate(
learning_rate_min,
learning_rate_max,
step_size,
global_step,
mode='triangular',
scope=None):
with tf.variable_scope(scope, 'CyclicLearningRate'):
cycle = tf.floor(1 + tf.to_float(global_step) / (2 * step_size))
if mode == 'triangular':
scale = 1
elif mode == 'triangular2':
scale = 2**(cycle - 1)
else:
raise ValueError('Unrecognized mode: {}'.format(mode))
x = tf.abs(tf.to_float(global_step) / step_size - 2 * cycle + 1)
lr = learning_rate_min + (learning_rate_max - learning_rate_min) * \
tf.maximum(0.0, 1 - x) / scale
return lr
def shrink_soft_threshold(r,rvar,theta):
"""
soft threshold function
y=sign(x)*max(0,abs(x)-theta[0]*sqrt(rvar) )*scaling
where scaling is theta[1] (default=1)
in other words, if theta is len(1), then the standard
"""
if len(theta.get_shape())>0 and theta.get_shape() != (1,):
lam = theta[0] * tf.sqrt(rvar)
scale=theta[1]
else:
lam = theta * tf.sqrt(rvar)
scale = None
lam = tf.maximum(lam,0)
arml = tf.abs(r) - lam
xhat = tf.sign(r) * tf.maximum(arml,0)
dxdr = tf.reduce_mean(tf.to_float(arml>0),0)
if scale is not None:
xhat = xhat*scale
dxdr = dxdr*scale
return (xhat,dxdr)
def shrink_bgest(r,rvar,theta):
"""Bernoulli-Gaussian MMSE estimator
Perform MMSE estimation E[x|r]
for x ~ BernoulliGaussian(lambda,xvar1)
r|x ~ Normal(x,rvar)
The parameters theta[0],theta[1] represent
The variance of non-zero x[i]
xvar1 = abs(theta[0])
The probability of nonzero x[i]
lamba = 1/(exp(theta[1])+1)
"""
xvar1 = abs(theta[...,0])
loglam = theta[...,1] # log(1/lambda - 1)
beta = 1/(1+rvar/xvar1)
r2scale = r*r*beta/rvar
rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar)
rho1 = rho+1
xhat = beta*r/rho1
dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 ))
dxdr = tf.reduce_mean(dxdr,0)
return (xhat,dxdr)
def pwlin_grid(r_,rvar_,theta_,dtheta = .75):
"""piecewise linear with noise-adaptive grid spacing.
returns xhat,dxdr
where
q = r/dtheta/sqrt(rvar)
xhat = r * interp(q,theta)
all but the last dimensions of theta must broadcast to r_
e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7)
"""
ntheta = int(theta_.get_shape()[-1])
scale_ = dtheta / tf.sqrt(rvar_)
ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 )
centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 )
outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center)
gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable)
xhat_ = gain_ * r_
dxdr_ = tf.gradients(xhat_,r_)[0]
return (xhat_,dxdr_)
def random_rotation(img: tf.Tensor, max_rotation: float=0.1, crop: bool=True) -> tf.Tensor: # from SeguinBe
with tf.name_scope('RandomRotation'):
rotation = tf.random_uniform([], -max_rotation, max_rotation)
rotated_image = tf.contrib.image.rotate(img, rotation, interpolation='BILINEAR')
if crop:
rotation = tf.abs(rotation)
original_shape = tf.shape(rotated_image)[:2]
h, w = original_shape[0], original_shape[1]
# see https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders for formulae
old_l, old_s = tf.cond(h > w, lambda: [h, w], lambda: [w, h])
old_l, old_s = tf.cast(old_l, tf.float32), tf.cast(old_s, tf.float32)
new_l = (old_l * tf.cos(rotation) - old_s * tf.sin(rotation)) / tf.cos(2*rotation)
new_s = (old_s - tf.sin(rotation) * new_l) / tf.cos(rotation)
new_h, new_w = tf.cond(h > w, lambda: [new_l, new_s], lambda: [new_s, new_l])
new_h, new_w = tf.cast(new_h, tf.int32), tf.cast(new_w, tf.int32)
bb_begin = tf.cast(tf.ceil((h-new_h)/2), tf.int32), tf.cast(tf.ceil((w-new_w)/2), tf.int32)
rotated_image_crop = rotated_image[bb_begin[0]:h - bb_begin[0], bb_begin[1]:w - bb_begin[1], :]
# If crop removes the entire image, keep the original image
rotated_image = tf.cond(tf.equal(tf.size(rotated_image_crop), 0),
true_fn=lambda: img,
false_fn=lambda: rotated_image_crop)
return rotated_image
def help_generate_np_gives_adversarial_example(self, ord):
x_val = np.random.rand(100, 2)
x_val = np.array(x_val, dtype=np.float32)
x_adv = self.attack.generate_np(x_val, eps=.5, ord=ord,
clip_min=-5, clip_max=5)
if ord == np.inf:
delta = np.max(np.abs(x_adv - x_val), axis=1)
elif ord == 1:
delta = np.sum(np.abs(x_adv - x_val), axis=1)
elif ord == 2:
delta = np.sum(np.square(x_adv - x_val), axis=1)**.5
self.assertClose(delta, 0.5)
orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
self.assertTrue(np.mean(orig_labs == new_labs) < 0.5)
def test_targeted_generate_np_gives_adversarial_example(self):
x_val = np.random.rand(100, 2)
x_val = np.array(x_val, dtype=np.float32)
random_labs = np.random.random_integers(0, 1, 100)
random_labs_one_hot = np.zeros((100, 2))
random_labs_one_hot[np.arange(100), random_labs] = 1
x_adv = self.attack.generate_np(x_val, eps=.5, ord=np.inf,
clip_min=-5, clip_max=5,
y_target=random_labs_one_hot)
delta = np.max(np.abs(x_adv - x_val), axis=1)
self.assertClose(delta, 0.5)
new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
self.assertTrue(np.mean(random_labs == new_labs) > 0.7)
def create_generator_loss(disc_output, gene_output, features):
# I.e. did we fool the discriminator?
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=disc_output, logits=tf.ones_like(disc_output))
gene_ce_loss = tf.reduce_mean(cross_entropy, name='gene_ce_loss')
# I.e. does the result look like the feature?
K = int(gene_output.get_shape()[1])//int(features.get_shape()[1])
assert K == 2 or K == 4 or K == 8
downscaled = _downscale(gene_output, K)
gene_l1_loss = tf.reduce_mean(tf.abs(downscaled - features), name='gene_l1_loss')
gene_loss = tf.add((1.0 - FLAGS.gene_l1_factor) * gene_ce_loss,
FLAGS.gene_l1_factor * gene_l1_loss, name='gene_loss')
return gene_loss
def smooth_l1_loss(offsets, gt_offsets, scope=None):
"""
Smooth L1 loss between offsets and encoded_gt
ARGS
offsets: [m?, 5], predicted offsets for one example
gt_offsets: [m?, 5], correponding groundtruth offsets
RETURN
loss: scalar
"""
with tf.variable_scope(scope or 'smooth_l1_loss'):
gt_offsets = tf.stop_gradient(gt_offsets)
diff = tf.abs(offsets - gt_offsets)
lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32)
larger_mask = 1.0 - lesser_mask
losses = (0.5 * tf.square(diff)) * lesser_mask + (diff - 0.5) * larger_mask
return tf.reduce_sum(losses, 1)
def __init__(self, actions):
self.replayMemory = deque()
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
self.actions = actions
self.files = 0
self.currentQNet = QNet(len(actions))
self.targetQNet = QNet(len(actions))
self.actionInput = tf.placeholder("float", [None, len(actions)],name="actions_one_hot")
self.yInput = tf.placeholder("float", [None],name="y")
self.action_mask = tf.multiply(self.currentQNet.QValue, self.actionInput)
self.Q_action = tf.reduce_sum(self.action_mask, reduction_indices=1)
self.delta = delta = tf.subtract(self.Q_action, self.yInput)
self.loss = tf.where(tf.abs(delta) < 1.0, 0.5 * tf.square(delta), tf.abs(delta) - 0.5)
#self.loss = tf.square(tf.subtract( self.Q_action, self.yInput ))
self.cost = tf.reduce_mean(self.loss)
self.trainStep = tf.train.RMSPropOptimizer(learning_rate=RMS_LEARNING_RATE,momentum=RMS_MOMENTUM,epsilon= RMS_EPSILON,decay=RMS_DECAY).minimize(
self.cost)
#
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def mu_law(x, mu=255, int8=False):
"""A TF implementation of Mu-Law encoding.
Args:
x: The audio samples to encode.
mu: The Mu to use in our Mu-Law.
int8: Use int8 encoding.
Returns:
out: The Mu-Law encoded int8 data.
"""
out = tf.sign(x) * tf.log(1 + mu * tf.abs(x)) / np.log(1 + mu)
out = tf.floor(out * 128)
if int8:
out = tf.cast(out, tf.int8)
return out
def compute_loss(self):
"""
??loss
Return:
loss: scalar
"""
if not self._use_crf:
labels = tf.reshape(
tf.contrib.layers.one_hot_encoding(
tf.reshape(self.input_label_ph, [-1]), num_classes=self._nb_classes),
shape=[-1, self._sequence_length, self._nb_classes])
cross_entropy = -tf.reduce_sum(labels * tf.log(self.logits), axis=2)
mask = tf.sign(tf.reduce_max(tf.abs(labels), axis=2))
cross_entropy_masked = tf.reduce_sum(
cross_entropy*mask, axis=1) / tf.cast(self.sequence_actual_length, tf.float32)
return tf.reduce_mean(cross_entropy_masked)
else:
log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(
self.logits, self.input_label_ph, self.sequence_actual_length)
return tf.reduce_mean(-log_likelihood)
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
# Can't use 1e-8 due to underflow
eps = 1e-7
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def _sample(self, n_samples):
# samples must be sampled from (-1, 1) rather than [-1, 1)
loc, scale = self.loc, self.scale
if not self.is_reparameterized:
loc = tf.stop_gradient(loc)
scale = tf.stop_gradient(scale)
shape = tf.concat([[n_samples], self.batch_shape], 0)
uniform_samples = tf.random_uniform(
shape=shape,
minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
self.dtype.as_numpy_dtype(0.)),
maxval=1.,
dtype=self.dtype)
samples = loc - scale * tf.sign(uniform_samples) * \
tf.log1p(-tf.abs(uniform_samples))
static_n_samples = n_samples if isinstance(n_samples, int) else None
samples.set_shape(
tf.TensorShape([static_n_samples]).concatenate(
self.get_batch_shape()))
return samples
def normal_map(tensor, shape):
"""
Generate a tangent-space normal map.
:param Tensor tensor:
:param list[int] shape:
:return: Tensor
"""
height, width, channels = shape
reference = value_map(tensor, shape, keep_dims=True)
x = normalize(1 - convolve(ConvKernel.sobel_x, reference, [height, width, 1]))
y = normalize(convolve(ConvKernel.sobel_y, reference, [height, width, 1]))
z = 1 - tf.abs(normalize(tf.sqrt(x * x + y * y)) * 2 - 1) * .5 + .5
return tf.stack([x[:, :, 0], y[:, :, 0], z[:, :, 0]], 2)
def rotate_crop(img, rotation, crop=True, interpolation='NEAREST'):
with tf.name_scope('RotateCrop'):
rotated_image = tf_rotate(img, rotation, interpolation)
if crop:
rotation = tf.abs(rotation)
original_shape = tf.shape(rotated_image)[:2]
h, w = original_shape[0], original_shape[1]
# see https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders for formulae
old_l, old_s = tf.cond(h > w, lambda: [h, w], lambda: [w, h])
old_l, old_s = tf.cast(old_l, tf.float32), tf.cast(old_s, tf.float32)
new_l = (old_l * tf.cos(rotation) - old_s * tf.sin(rotation)) / tf.cos(2 * rotation)
new_s = (old_s - tf.sin(rotation) * new_l) / tf.cos(rotation)
new_h, new_w = tf.cond(h > w, lambda: [new_l, new_s], lambda: [new_s, new_l])
new_h, new_w = tf.cast(new_h, tf.int32), tf.cast(new_w, tf.int32)
bb_begin = tf.cast(tf.ceil((h - new_h) / 2), tf.int32), tf.cast(tf.ceil((w - new_w) / 2), tf.int32)
rotated_image_crop = rotated_image[bb_begin[0]:h - bb_begin[0], bb_begin[1]:w - bb_begin[1], :]
# If crop removes the entire image, keep the original image
rotated_image = tf.cond(tf.equal(tf.size(rotated_image_crop), 0),
true_fn=lambda: img,
false_fn=lambda: rotated_image_crop)
return rotated_image
def svr(X, Y):
"""Support vector regressor, kind of..."""
lambda_ = 1e-4
eps = 0.01
lenscale = 1.
# Specify which kernel to approximate with the random Fourier features
kern = ab.RBF(lenscale=lenscale)
net = (
# ab.InputLayer(name="X", n_samples=n_samples_) >>
ab.InputLayer(name="X", n_samples=1) >>
ab.RandomFourier(n_features=50, kernel=kern) >>
# ab.DropOut(keep_prob=0.9) >>
ab.DenseMAP(output_dim=1, l2_reg=lambda_, l1_reg=0.)
)
f, reg = net(X=X)
loss = tf.reduce_mean(tf.nn.relu(tf.abs(Y - f) - eps)) + reg
return f, loss
def L1(tensor, wd=0.001):
""" L1.
Computes the L1 norm of a tensor:
output = sum(|t|) * wd
Arguments:
tensor: `Tensor`. The tensor to apply regularization.
wd: `float`. The decay.
Returns:
The regularization `Tensor`.
"""
return tf.multiply(tf.reduce_sum(tf.abs(tensor)), wd, name='L1-Loss')
def elu(x):
""" ELU.
Exponential Linear Unit.
Arguments:
x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
`int16`, or `int8`
Returns:
A `tuple` of `tf.Tensor`. This layer inference, i.e. output Tensors
at training and testing time.
References:
Fast and Accurate Deep Network Learning by Exponential Linear Units,
Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter. 2015.
Links:
[http://arxiv.org/abs/1511.07289](http://arxiv.org/abs/1511.07289)
"""
return tf.nn.elu(x)
def crelu(x):
""" CReLU
Computes Concatenated ReLU.
Concatenates a ReLU which selects only the positive part of the activation
with a ReLU which selects only the negative part of the activation. Note
that as a result this non-linearity doubles the depth of the activations.
Arguments:
x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
`int16`, or `int8`.
Returns:
A `Tensor` with the same type as `x`.
Links:
[https://arxiv.org/abs/1603.05201](https://arxiv.org/abs/1603.05201)
"""
return tf.nn.crelu(x)
def huber_loss(infer, label, epsilon, layer_name):
"""
Args:
infer
label
epsilon
layer_name
"""
with tf.variable_scope(layer_name):
abs_diff = tf.abs(tf.sub(infer, label));
index = tf.to_int32(abs_diff <= epsilon, name = 'partition_index')
l1_part, l2_part = tf.dynamic_partition(abs_diff, index, 2)
#l1_loss = tf.reduce_mean(l1_part, name = 'l1_loss')
#l2_loss = tf.reduce_mean(tf.square(l2_part), name = 'l2_loss')
l1_part_loss = epsilon * (l1_part - 0.5 * epsilon)
l2_part_loss = 0.5 * tf.square(l2_part)
hloss = tf.reduce_mean(tf.concat(0, [l1_part_loss,l2_part_loss]),
name = 'huber_loss_sum')
return hloss
def huber_loss(infer, label, epsilon, layer_name):
"""
Args:
infer
label
epsilon
layer_name
"""
with tf.variable_scope(layer_name):
abs_diff = tf.abs(tf.sub(infer, label));
index = tf.to_int32(abs_diff <= epsilon, name = 'partition_index')
l1_part, l2_part = tf.dynamic_partition(abs_diff, index, 2)
#l1_loss = tf.reduce_mean(l1_part, name = 'l1_loss')
#l2_loss = tf.reduce_mean(tf.square(l2_part), name = 'l2_loss')
l1_part_loss = epsilon * (l1_part - 0.5 * epsilon)
l2_part_loss = 0.5 * tf.square(l2_part)
hloss = tf.reduce_mean(tf.concat(0, [l1_part_loss,l2_part_loss]),
name = 'huber_loss_sum')
return hloss
def lp_loss(gen_frames, gt_frames, l_num):
"""
Calculates the sum of lp losses between the predicted and ground truth frames.
@param gen_frames: The predicted frames at each scale.
@param gt_frames: The ground truth frames at each scale
@param l_num: 1 or 2 for l1 and l2 loss, respectively).
@return: The lp loss.
"""
# calculate the loss for each scale
scale_losses = []
for i in xrange(len(gen_frames)):
scale_losses.append(tf.reduce_sum(tf.abs(gen_frames[i] - gt_frames[i])**l_num))
# condense into one tensor and avg
return tf.reduce_mean(tf.pack(scale_losses))
def stochastical_binarize_gradients(grads_and_vars, scalers):
"""Stochastically binarize gradients."""
gradients, variables = zip(*grads_and_vars)
binarized_gradients = []
for gradient, scaler in zip(gradients, scalers):
if gradient is None:
binarized_gradients.append(None)
continue
if isinstance(gradient, tf.IndexedSlices):
gradient_shape = gradient.dense_shape
else:
gradient_shape = gradient.get_shape()
zeros = tf.zeros(gradient_shape)
abs_gradient = tf.abs(gradient)
sign_gradient = tf.sign( gradient )
rnd_sample = tf.random_uniform(gradient_shape,0,scaler)
where_cond = tf.less(rnd_sample, abs_gradient)
binarized_gradient = tf.cond(tf.size(gradient) < FLAGS.size_to_binarize,
lambda: gradient,
lambda: tf.where(where_cond, sign_gradient * scaler, zeros))
binarized_gradients.append(binarized_gradient)
return list(zip(binarized_gradients, variables))
def gradient_binarizing_scalers(grads_and_vars, clip_factor):
""" Get the scalers."""
gradients, variables = zip(*grads_and_vars)
scalers = []
for gradient in gradients:
if gradient is None:
scalers.append(None)
continue
if(clip_factor > 1.0e-5):
mean_gradient = tf.reduce_mean(gradient)
stddev_gradient = tf.sqrt(tf.reduce_mean(tf.square(gradient - mean_gradient)))
scalers.append(clip_factor * stddev_gradient)
else:
scalers.append(tf.reduce_max(tf.abs(gradient)))
return list(zip(scalers, variables))
def huber_loss(y_true, y_pred, clip_value):
# Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and
# https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b
# for details.
assert clip_value > 0.
x = y_true - y_pred
if np.isinf(clip_value):
# Spacial case for infinity since Tensorflow does have problems
# if we compare `K.abs(x) < np.inf`.
return .5 * tf.square(x)
condition = tf.abs(x) < clip_value
squared_loss = .5 * tf.square(x)
linear_loss = clip_value * (tf.abs(x) - .5 * clip_value)
return tf.where(condition, squared_loss, linear_loss) # condition, true, false