def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
"""Applies batch normalization on x given mean, var, beta and gamma.
I.e. returns:
`output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta`
Arguments:
x: Input tensor or variable.
mean: Mean of batch.
var: Variance of batch.
beta: Tensor with which to center the input.
gamma: Tensor by which to scale the input.
epsilon: Fuzz factor.
Returns:
A tensor.
"""
return nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
# SHAPE OPERATIONS
python类sqrt()的实例源码
def lecun_uniform(seed=None):
"""LeCun uniform initializer.
It draws samples from a uniform distribution within [-limit, limit]
where `limit` is `sqrt(3 / fan_in)`
where `fan_in` is the number of input units in the weight tensor.
Arguments:
seed: A Python integer. Used to seed the random generator.
Returns:
An initializer.
References:
LeCun 98, Efficient Backprop,
http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
"""
return VarianceScaling(
scale=1., mode='fan_in', distribution='uniform', seed=seed)
def glorot_normal(seed=None):
"""Glorot normal initializer, also called Xavier normal initializer.
It draws samples from a truncated normal distribution centered on 0
with `stddev = sqrt(2 / (fan_in + fan_out))`
where `fan_in` is the number of input units in the weight tensor
and `fan_out` is the number of output units in the weight tensor.
Arguments:
seed: A Python integer. Used to seed the random generator.
Returns:
An initializer.
References:
Glorot & Bengio, AISTATS 2010
http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
"""
return VarianceScaling(
scale=1., mode='fan_avg', distribution='normal', seed=seed)
def glorot_uniform(seed=None):
"""Glorot uniform initializer, also called Xavier uniform initializer.
It draws samples from a uniform distribution within [-limit, limit]
where `limit` is `sqrt(6 / (fan_in + fan_out))`
where `fan_in` is the number of input units in the weight tensor
and `fan_out` is the number of output units in the weight tensor.
Arguments:
seed: A Python integer. Used to seed the random generator.
Returns:
An initializer.
References:
Glorot & Bengio, AISTATS 2010
http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
"""
return VarianceScaling(
scale=1., mode='fan_avg', distribution='uniform', seed=seed)
def he_normal(seed=None):
"""He normal initializer.
It draws samples from a truncated normal distribution centered on 0
with `stddev = sqrt(2 / fan_in)`
where `fan_in` is the number of input units in the weight tensor.
Arguments:
seed: A Python integer. Used to seed the random generator.
Returns:
An initializer.
References:
He et al., http://arxiv.org/abs/1502.01852
"""
return VarianceScaling(
scale=2., mode='fan_in', distribution='normal', seed=seed)
def he_uniform(seed=None):
"""He uniform variance scaling initializer.
It draws samples from a uniform distribution within [-limit, limit]
where `limit` is `sqrt(6 / fan_in)`
where `fan_in` is the number of input units in the weight tensor.
Arguments:
seed: A Python integer. Used to seed the random generator.
Returns:
An initializer.
References:
He et al., http://arxiv.org/abs/1502.01852
"""
return VarianceScaling(
scale=2., mode='fan_in', distribution='uniform', seed=seed)
# Compatibility aliases
# pylint: disable=invalid-name
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
shapes = [K.int_shape(p) for p in params]
accumulators = [K.zeros(shape) for shape in shapes]
self.weights = accumulators
self.updates = []
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
self.updates.append(K.update_add(self.iterations, 1))
for p, g, a in zip(params, grads, accumulators):
# update accumulator
new_a = self.rho * a + (1. - self.rho) * K.square(g)
self.updates.append(K.update(a, new_a))
new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)
# apply constraints
if p in constraints:
c = constraints[p]
new_p = c(new_p)
self.updates.append(K.update(p, new_p))
return self.updates
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
shapes = [K.int_shape(p) for p in params]
accumulators = [K.zeros(shape) for shape in shapes]
self.weights = accumulators
self.updates = []
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
self.updates.append(K.update_add(self.iterations, 1))
for p, g, a in zip(params, grads, accumulators):
new_a = a + K.square(g) # update accumulator
self.updates.append(K.update(a, new_a))
new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)
# apply constraints
if p in constraints:
c = constraints[p]
new_p = c(new_p)
self.updates.append(K.update(p, new_p))
return self.updates
def loss(self, data, labels):
"""The loss to minimize while training."""
if self.is_regression:
diff = self.training_inference_graph(data) - math_ops.to_float(labels)
mean_squared_error = math_ops.reduce_mean(diff * diff)
root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss")
loss = root_mean_squared_error
else:
loss = math_ops.reduce_mean(
nn_ops.sparse_softmax_cross_entropy_with_logits(
self.training_inference_graph(data),
array_ops.squeeze(math_ops.to_int32(labels))),
name="loss")
if self.regularizer:
loss += layers.apply_regularization(self.regularizer,
variables.trainable_variables())
return loss
def _sample_n(self, n, seed=None):
# We use 2 uniform random floats to generate polar random variates.
# http://dl.acm.org/citation.cfm?id=179631
# Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1].
# Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0.
# Let X = R*cos(theta), and let Y = R*sin(theta).
# Then X ~ t_df and Y ~ t_df.
# The variates X and Y are not independent.
shape = array_ops.concat(0, ([2, n], self.batch_shape()))
uniform = random_ops.random_uniform(shape=shape,
dtype=self.dtype,
seed=seed)
samples_g, samples_h = array_ops.unpack(uniform, num=2)
theta = (2. * math.pi) * samples_h
r = math_ops.sqrt(self.df *
(math_ops.pow(samples_g, -2 / self.df) - 1))
samples = r * math_ops.cos(theta)
return samples * self.sigma + self.mu
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name):
"""Find max_norm given norm and previous average."""
with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]):
log_norm = math_ops.log(norm + epsilon)
def moving_average(name, value, decay):
moving_average_variable = vs.get_variable(
name, shape=value.get_shape(), dtype=value.dtype,
initializer=init_ops.zeros_initializer, trainable=False)
return moving_averages.assign_moving_average(
moving_average_variable, value, decay, zero_debias=False)
# quicker adaptation at the beginning
if global_step is not None:
n = math_ops.to_float(global_step)
decay = math_ops.minimum(decay, n / (n + 1.))
# update averages
mean = moving_average("mean", log_norm, decay)
sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay)
variance = sq_mean - math_ops.square(mean)
std = math_ops.sqrt(math_ops.maximum(epsilon, variance))
max_norms = math_ops.exp(mean + std_factor*std)
return max_norms, mean
def _apply_dense(self, grad, var):
lr = (self._lr_t *
math_ops.sqrt(1 - self._beta2_power)
/ (1 - self._beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - self._beta1_t)
m_t = m * self._beta1_t
m_t = m_t + m_scaled_g_values
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = tf.pow(grad, 2) * (1 - self._beta2_t)
v_t = v * self._beta2_t
v_t = v_t + v_scaled_g_values
v_sqrt = tf.pow(v_t, self._pow_t)
var_update = state_ops.assign_sub(var,
lr * m_t / (v_sqrt + self._epsilon_t),
use_locking=self._use_locking)
# regularization
var_update = state_ops.assign_sub(var_update,
self._dense_regularization * var,
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t])
def _apply_dense(self, g_t, x_tm1, prepare):
""""""
updates = []
if self._mu > 0:
m_and_t = self._dense_moving_average(x_tm1, g_t, 'm', self._mu)
m_bar_t = m_and_t[0]
updates.extend(m_and_t)
else:
m_bar_t = g_t
if self._ups > 0:
v_and_t = self._dense_moving_average(x_tm1, g_t**2, 'v', self._ups)
eps_t = ops.convert_to_tensor(self._eps)
v_bar_t = math_ops.sqrt(v_and_t[0] + eps_t)
updates.extend(v_and_t)
else:
v_bar_t = 1.
s_t = self._lr * m_bar_t / v_bar_t
return [[s_t, x_tm1, g_t]] + updates
#=============================================================
def _apply_dense(self, g_t, x_tm1, prepare):
""""""
updates = []
if self._mu > 0:
m_and_t = self._dense_moving_average(x_tm1, g_t, 'm', self._mu)
m_bar_t = m_and_t[0]
updates.extend(m_and_t)
else:
m_bar_t = g_t
if self._ups > 0:
v_and_t = self._dense_moving_average(x_tm1, g_t**2, 'v', self._ups)
eps_t = ops.convert_to_tensor(self._eps)
v_bar_t = math_ops.sqrt(v_and_t[0] + eps_t)
updates.extend(v_and_t)
else:
v_bar_t = 1.
s_t = self._lr * m_bar_t / v_bar_t
return [[s_t, x_tm1, g_t]] + updates
#=============================================================
layers_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def testUnitNormWithRandomMatrix(self):
height, width = 2, 3
for dim in range(3):
random_seed.set_random_seed(0)
image = random_ops.random_uniform((height, width, 3))
output = _layers.unit_norm(image, dim=dim, epsilon=1e-6)
norms = math_ops.sqrt(
math_ops.reduce_sum(
math_ops.square(output), reduction_indices=dim))
shape = [height, width, 3]
del shape[dim]
expected = np.ones(shape)
with self.test_session():
actual = norms.eval()
self.assertAllClose(expected, actual, 1e-4, 1e-4)
layers_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def testKnownRankUnknownDimsSucceeds(self):
height, width = 2, 3
for dim in range(3):
placeholder_value = np.ones((height, width, 3))
shape = [height, width, 3]
del shape[dim]
expected = np.ones(shape)
image = array_ops.placeholder(dtypes.float32, (None, None, 3))
output = _layers.unit_norm(image, dim=dim, epsilon=1e-6)
norms = math_ops.sqrt(
math_ops.reduce_sum(
math_ops.square(output), reduction_indices=dim))
with self.test_session():
actual = norms.eval({image: placeholder_value})
self.assertAllClose(expected, actual, 1e-4, 1e-4)
# TODO(b/28426988): Add separate tests for non-legacy versions.
bijector.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def sqrt_matmul(self, x):
"""Computes `matmul(self, x)`.
Doesn't actually do the sqrt! Named as such to agree with API.
Args:
x: `Tensor`
Returns:
self_times_x: `Tensor`
"""
m_x = math_ops.matmul(self._m, x)
vt_x = math_ops.matmul(self._v, x, adjoint_a=True)
d_vt_x = self._d.matmul(vt_x)
v_d_vt_x = math_ops.matmul(self._v, d_vt_x)
return m_x + v_d_vt_x
bijector.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def sqrt_log_abs_det(self):
"""Computes (log o abs o det)(X) for matrix X.
Doesn't actually do the sqrt! Named as such to agree with API.
To compute det(M + V D V.T), we use the matrix determinant lemma:
det(Tril + V D V.T) = det(C) det(D) det(M)
where C is defined as in `_inverse`, ie,
C = inv(D) + V.T inv(M) V.
See: https://en.wikipedia.org/wiki/Matrix_determinant_lemma
Returns:
log_abs_det: `Tensor`.
"""
log_det_c = math_ops.log(math_ops.abs(
linalg_ops.matrix_determinant(self._woodbury_sandwiched_term())))
# Reduction is ok because we always prepad inputs to this class.
log_det_m = math_ops.reduce_sum(math_ops.log(math_ops.abs(
array_ops.matrix_diag_part(self._m))), reduction_indices=[-1])
return log_det_c + 2. * self._d.sqrt_log_abs_det() + log_det_m
def selu(x):
with ops.name_scope('elu') as scope:
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
# (3) initialize weights with stddev sqrt(1/n)
# e.g. use:
def dropout_selu(x, rate, alpha= -1.7580993408473766, fixedPointMean=0.0, fixedPointVar=1.0,
noise_shape=None, seed=None, name=None, training=False):
"""Dropout to a value with rescaling."""
def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):
keep_prob = 1.0 - rate
x = ops.convert_to_tensor(x, name="x")
if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
raise ValueError("keep_prob must be a scalar tensor or a float in the "
"range (0, 1], got %g" % keep_prob)
keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob")
keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())
alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha")
alpha.get_shape().assert_is_compatible_with(tensor_shape.scalar())
if tensor_util.constant_value(keep_prob) == 1:
return x
noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
random_tensor = keep_prob
random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)
binary_tensor = math_ops.floor(random_tensor)
ret = x * binary_tensor + alpha * (1-binary_tensor)
a = math_ops.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * math_ops.pow(alpha-fixedPointMean,2) + fixedPointVar)))
b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha)
ret = a * ret + b
ret.set_shape(x.get_shape())
return ret
with ops.name_scope(name, "dropout", [x]) as name:
return utils.smart_cond(training,
lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name),
lambda: array_ops.identity(x))
def std(x, axis=None, keepdims=False):
"""Standard deviation of a tensor, alongside the specified axis.
Arguments:
x: A tensor or variable.
axis: An integer, the axis to compute the standard deviation.
keepdims: A boolean, whether to keep the dimensions or not.
If `keepdims` is `False`, the rank of the tensor is reduced
by 1. If `keepdims` is `True`,
the reduced dimension is retained with length 1.
Returns:
A tensor with the standard deviation of elements of `x`.
"""
return math_ops.sqrt(var(x, axis=axis, keepdims=keepdims))
def sqrt(x):
"""Element-wise square root.
Arguments:
x: Tensor or variable.
Returns:
A tensor.
"""
zero = _to_tensor(0., x.dtype.base_dtype)
inf = _to_tensor(np.inf, x.dtype.base_dtype)
x = clip_ops.clip_by_value(x, zero, inf)
return math_ops.sqrt(x)
def __call__(self, w):
norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True))
desired = K.clip(norms, 0, self.max_value)
w *= (desired / (K.epsilon() + norms))
return w
def __call__(self, w):
return w / (
K.epsilon() + K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)))
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
shapes = [K.int_shape(p) for p in params]
accumulators = [K.zeros(shape) for shape in shapes]
delta_accumulators = [K.zeros(shape) for shape in shapes]
self.weights = accumulators + delta_accumulators
self.updates = []
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
self.updates.append(K.update_add(self.iterations, 1))
for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
# update accumulator
new_a = self.rho * a + (1. - self.rho) * K.square(g)
self.updates.append(K.update(a, new_a))
# use the new accumulator and the *old* delta_accumulator
update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
new_p = p - lr * update
# apply constraints
if p in constraints:
c = constraints[p]
new_p = c(new_p)
self.updates.append(K.update(p, new_p))
# update delta_accumulator
new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
self.updates.append(K.update(d_a, new_d_a))
return self.updates
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
self.updates = [K.update_add(self.iterations, 1)]
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
t = self.iterations + 1
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
(1. - K.pow(self.beta_1, t)))
shapes = [K.int_shape(p) for p in params]
ms = [K.zeros(shape) for shape in shapes]
vs = [K.zeros(shape) for shape in shapes]
self.weights = [self.iterations] + ms + vs
for p, g, m, v in zip(params, grads, ms, vs):
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
self.updates.append(K.update(m, m_t))
self.updates.append(K.update(v, v_t))
new_p = p_t
# apply constraints
if p in constraints:
c = constraints[p]
new_p = c(new_p)
self.updates.append(K.update(p, new_p))
return self.updates
def call(self, inputs, training=None):
if 0 < self.rate < 1:
def noised():
stddev = np.sqrt(self.rate / (1.0 - self.rate))
return inputs * K.random_normal(
shape=K.shape(inputs), mean=1.0, stddev=stddev)
return K.in_train_phase(noised, inputs, training=training)
return inputs
def unit_norm(inputs, dim, epsilon=1e-7, scope=None):
"""Normalizes the given input across the specified dimension to unit length.
Note that the rank of `input` must be known.
Args:
inputs: A `Tensor` of arbitrary size.
dim: The dimension along which the input is normalized.
epsilon: A small value to add to the inputs to avoid dividing by zero.
scope: Optional scope for variable_scope.
Returns:
The normalized `Tensor`.
Raises:
ValueError: If dim is smaller than the number of dimensions in 'inputs'.
"""
with variable_scope.variable_scope(scope, 'UnitNorm', [inputs]):
if not inputs.get_shape():
raise ValueError('The input rank must be known.')
input_rank = len(inputs.get_shape().as_list())
if dim < 0 or dim >= input_rank:
raise ValueError(
'dim must be positive but smaller than the input rank.')
lengths = math_ops.sqrt(epsilon + math_ops.reduce_sum(
math_ops.square(inputs), dim, True))
multiples = []
if dim > 0:
multiples.append(array_ops.ones([dim], dtypes.int32))
multiples.append(array_ops.slice(array_ops.shape(inputs), [dim], [1]))
if dim < (input_rank - 1):
multiples.append(array_ops.ones([input_rank - 1 - dim], dtypes.int32))
multiples = array_ops.concat(0, multiples)
return math_ops.div(inputs, array_ops.tile(lengths, multiples))
def _mean(self):
if self.cholesky_input_output_matrices:
return math_ops.sqrt(self.df) * self.scale_operator_pd.sqrt_to_dense()
return self.df * self.scale_operator_pd.to_dense()
def _variance(self):
x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1)
v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
if self.cholesky_input_output_matrices:
return linalg_ops.cholesky(v)
return v