def testDropout(self):
height, width = 10, 10
with self.test_session() as sess:
images = random_ops.random_uniform(
(5, height, width, 3), seed=1, name='images')
num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0))
output = _layers.dropout(images)
num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0))
sess.run(variables_lib.global_variables_initializer())
num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial])
self.assertLess(num_elem, num_elem_initial / 2 + 0.1)
self.assertGreater(num_elem, num_elem_initial / 2 - 0.1)
python类reduce_mean()的实例源码
layers_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
layers_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def testCreateDropoutNoTraining(self):
height, width = 3, 3
with self.test_session() as sess:
images = random_ops.random_uniform(
(5, height, width, 3), seed=1, name='images')
num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0))
output = _layers.dropout(images, is_training=False)
num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0))
sess.run(variables_lib.global_variables_initializer())
num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial])
self.assertEqual(num_elem, num_elem_initial)
outputs, inputs = sess.run([output, images])
self.assertAllClose(outputs, inputs)
layers_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def testCreateFCFollowByDropout(self):
height, width = 3, 3
with self.test_session() as sess:
images = random_ops.random_uniform(
(5, height, width, 3), seed=1, name='images')
output = _layers.fully_connected(images, 50)
num_elem_initial = math_ops.reduce_mean(math_ops.to_float(output > 0))
output = _layers.dropout(output)
num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0))
sess.run(variables_lib.global_variables_initializer())
num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial])
self.assertLess(num_elem, num_elem_initial / 2 + 0.1)
self.assertGreater(num_elem, num_elem_initial / 2 - 0.1)
target_column.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def training_loss(self, logits, target, features, name="training_loss"):
"""Returns training loss tensor for this head.
Training loss is different from the loss reported on the tensorboard as we
should respect the example weights when computing the gradient.
L = sum_{i} w_{i} * l_{i} / B
where B is the number of examples in the batch, l_{i}, w_{i} are individual
losses, and example weight.
Args:
logits: logits, a float tensor.
target: either a tensor for labels or in multihead case, a dict of string
to target tensor.
features: features dict.
name: Op name.
Returns:
Loss tensor.
"""
target = target[self.name] if isinstance(target, dict) else target
loss_unweighted = self._loss_fn(logits, target)
weight_tensor = self.get_weight_tensor(features)
if weight_tensor is None:
return math_ops.reduce_mean(loss_unweighted, name=name)
loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
return math_ops.reduce_mean(loss_weighted, name=name)
target_column.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def loss(self, logits, target, features):
"""Returns loss tensor for this head.
The loss returned is the weighted average.
L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i}
Args:
logits: logits, a float tensor.
target: either a tensor for labels or in multihead case, a dict of string
to target tensor.
features: features dict.
Returns:
Loss tensor.
"""
target = target[self.name] if isinstance(target, dict) else target
loss_unweighted = self._loss_fn(logits, target)
weight_tensor = self.get_weight_tensor(features)
if weight_tensor is None:
return math_ops.reduce_mean(loss_unweighted, name="loss")
loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
return math_ops.div(math_ops.reduce_sum(loss_weighted),
math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
name="loss")
classification.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def accuracy(predictions, labels, weights=None):
"""Computes the percentage of times that predictions matches labels.
Args:
predictions: the predicted values, a `Tensor` whose dtype and shape
matches 'labels'.
labels: the ground truth values, a `Tensor` of any shape and
bool, integer, or string dtype.
weights: None or `Tensor` of float values to reweight the accuracy.
Returns:
Accuracy `Tensor`.
Raises:
ValueError: if dtypes don't match or
if dtype is not bool, integer, or string.
"""
if not (labels.dtype.is_integer or
labels.dtype in (dtypes.bool, dtypes.string)):
raise ValueError(
'Labels should have bool, integer, or string dtype, not %r' %
labels.dtype)
if not labels.dtype.is_compatible_with(predictions.dtype):
raise ValueError('Dtypes of predictions and labels should match. '
'Given: predictions (%r) and labels (%r)' %
(predictions.dtype, labels.dtype))
with ops.name_scope('accuracy', values=[predictions, labels]):
is_correct = math_ops.cast(
math_ops.equal(predictions, labels), dtypes.float32)
if weights is not None:
is_correct = math_ops.multiply(is_correct, weights)
num_values = math_ops.multiply(weights, array_ops.ones_like(is_correct))
return math_ops.div(math_ops.reduce_sum(is_correct),
math_ops.reduce_sum(num_values))
return math_ops.reduce_mean(is_correct)
ops_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_name(self):
actual_lt = ops.reduce_mean(self.original_lt, {'channel'})
self.assertIn('lt_reduce_mean', actual_lt.name)
gamma_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def testGammaGammaKL(self):
alpha0 = np.array([3.])
beta0 = np.array([1., 2., 3., 1.5, 2.5, 3.5])
alpha1 = np.array([0.4])
beta1 = np.array([0.5, 1., 1.5, 2., 2.5, 3.])
# Build graph.
with self.test_session() as sess:
g0 = gamma_lib.Gamma(alpha=alpha0, beta=beta0)
g1 = gamma_lib.Gamma(alpha=alpha1, beta=beta1)
x = g0.sample(int(1e4), seed=0)
kl_sample = math_ops.reduce_mean(g0.log_prob(x) - g1.log_prob(x), 0)
kl_actual = kullback_leibler.kl(g0, g1)
# Execute graph.
[kl_sample_, kl_actual_] = sess.run([kl_sample, kl_actual])
kl_expected = ((alpha0 - alpha1) * special.digamma(alpha0)
+ special.gammaln(alpha1)
- special.gammaln(alpha0)
+ alpha1 * np.log(beta0)
- alpha1 * np.log(beta1)
+ alpha0 * (beta1 / beta0 - 1.))
self.assertEqual(beta0.shape, kl_actual.get_shape())
self.assertAllClose(kl_expected, kl_actual_, atol=0., rtol=1e-6)
self.assertAllClose(kl_sample_, kl_actual_, atol=0., rtol=1e-2)
dirichlet_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def testCovarianceFromSampling(self):
alpha = np.array([[1., 2, 3],
[2.5, 4, 0.01]], dtype=np.float32)
with self.test_session() as sess:
dist = dirichlet_lib.Dirichlet(alpha) # batch_shape=[2], event_shape=[3]
x = dist.sample(int(250e3), seed=1)
sample_mean = math_ops.reduce_mean(x, 0)
x_centered = x - sample_mean[None, ...]
sample_cov = math_ops.reduce_mean(math_ops.matmul(
x_centered[..., None], x_centered[..., None, :]), 0)
sample_var = array_ops.matrix_diag_part(sample_cov)
sample_stddev = math_ops.sqrt(sample_var)
[
sample_mean_,
sample_cov_,
sample_var_,
sample_stddev_,
analytic_mean,
analytic_cov,
analytic_var,
analytic_stddev,
] = sess.run([
sample_mean,
sample_cov,
sample_var,
sample_stddev,
dist.mean(),
dist.covariance(),
dist.variance(),
dist.stddev(),
])
self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04)
self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.06)
self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03)
self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
dirichlet_multinomial_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testCovarianceFromSampling(self):
# We will test mean, cov, var, stddev on a DirichletMultinomial constructed
# via broadcast between alpha, n.
alpha = np.array([[1., 2, 3],
[2.5, 4, 0.01]], dtype=np.float32)
# Ideally we'd be able to test broadcasting but, the multinomial sampler
# doesn't support different total counts.
n = np.float32(5)
with self.test_session() as sess:
# batch_shape=[2], event_shape=[3]
dist = ds.DirichletMultinomial(n, alpha)
x = dist.sample(int(250e3), seed=1)
sample_mean = math_ops.reduce_mean(x, 0)
x_centered = x - sample_mean[None, ...]
sample_cov = math_ops.reduce_mean(math_ops.matmul(
x_centered[..., None], x_centered[..., None, :]), 0)
sample_var = array_ops.matrix_diag_part(sample_cov)
sample_stddev = math_ops.sqrt(sample_var)
[
sample_mean_,
sample_cov_,
sample_var_,
sample_stddev_,
analytic_mean,
analytic_cov,
analytic_var,
analytic_stddev,
] = sess.run([
sample_mean,
sample_cov,
sample_var,
sample_stddev,
dist.mean(),
dist.covariance(),
dist.variance(),
dist.stddev(),
])
self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04)
self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.05)
self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03)
self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
dirichlet_multinomial_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testSampleUnbiasedScalarBatch(self):
with self.test_session() as sess:
dist = ds.DirichletMultinomial(
n=5., alpha=2. * self._rng.rand(4).astype(np.float32))
n = int(5e3)
x = dist.sample(n, seed=0)
sample_mean = math_ops.reduce_mean(x, 0)
x_centered = x - sample_mean # Already transposed to [n, 2].
sample_covariance = math_ops.matmul(
x_centered, x_centered, adjoint_a=True) / n
[
sample_mean_,
sample_covariance_,
actual_mean_,
actual_covariance_,
] = sess.run([
sample_mean,
sample_covariance,
dist.mean(),
dist.covariance(),
])
self.assertAllEqual([4], sample_mean.get_shape())
self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.05)
self.assertAllEqual([4, 4], sample_covariance.get_shape())
self.assertAllClose(
actual_covariance_, sample_covariance_, atol=0., rtol=0.15)
multinomial_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testCovarianceFromSampling(self):
# We will test mean, cov, var, stddev on a DirichletMultinomial constructed
# via broadcast between alpha, n.
theta = np.array([[1., 2, 3],
[2.5, 4, 0.01]], dtype=np.float32)
theta /= np.sum(theta, 1)[..., None]
# Ideally we'd be able to test broadcasting but, the multinomial sampler
# doesn't support different total counts.
n = np.float32(5)
with self.test_session() as sess:
dist = ds.Multinomial(n, theta) # batch_shape=[2], event_shape=[3]
x = dist.sample(int(250e3), seed=1)
sample_mean = math_ops.reduce_mean(x, 0)
x_centered = x - sample_mean[None, ...]
sample_cov = math_ops.reduce_mean(math_ops.matmul(
x_centered[..., None], x_centered[..., None, :]), 0)
sample_var = array_ops.matrix_diag_part(sample_cov)
sample_stddev = math_ops.sqrt(sample_var)
[
sample_mean_,
sample_cov_,
sample_var_,
sample_stddev_,
analytic_mean,
analytic_cov,
analytic_var,
analytic_stddev,
] = sess.run([
sample_mean,
sample_cov,
sample_var,
sample_stddev,
dist.mean(),
dist.covariance(),
dist.variance(),
dist.stddev(),
])
self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.01)
self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.01)
self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.01)
self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.01)
multinomial_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testSampleUnbiasedNonScalarBatch(self):
with self.test_session() as sess:
dist = ds.Multinomial(
total_count=5.,
logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32)))
n = int(3e3)
x = dist.sample(n, seed=0)
sample_mean = math_ops.reduce_mean(x, 0)
# Cyclically rotate event dims left.
x_centered = array_ops.transpose(x - sample_mean, [1, 2, 3, 0])
sample_covariance = math_ops.matmul(
x_centered, x_centered, adjoint_b=True) / n
[
sample_mean_,
sample_covariance_,
actual_mean_,
actual_covariance_,
] = sess.run([
sample_mean,
sample_covariance,
dist.mean(),
dist.covariance(),
])
self.assertAllEqual([4, 3, 2], sample_mean.get_shape())
self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.07)
self.assertAllEqual([4, 3, 2, 2], sample_covariance.get_shape())
self.assertAllClose(
actual_covariance_, sample_covariance_, atol=0., rtol=0.10)
multinomial_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def testSampleUnbiasedScalarBatch(self):
with self.test_session() as sess:
dist = ds.Multinomial(
total_count=5.,
logits=math_ops.log(2. * self._rng.rand(4).astype(np.float32)))
n = int(5e3)
x = dist.sample(n, seed=0)
sample_mean = math_ops.reduce_mean(x, 0)
x_centered = x - sample_mean # Already transposed to [n, 2].
sample_covariance = math_ops.matmul(
x_centered, x_centered, adjoint_a=True) / n
[
sample_mean_,
sample_covariance_,
actual_mean_,
actual_covariance_,
] = sess.run([
sample_mean,
sample_covariance,
dist.mean(),
dist.covariance(),
])
self.assertAllEqual([4], sample_mean.get_shape())
self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.07)
self.assertAllEqual([4, 4], sample_covariance.get_shape())
self.assertAllClose(
actual_covariance_, sample_covariance_, atol=0., rtol=0.10)
misc.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def pool_as_vector(images, scope=None):
"""Reduce images to vectors by averaging all pixels."""
with ops.name_scope(scope, "PoolAsVector", [images]):
return math_ops.reduce_mean(images, [1, 2])
def average_gradients(tower_grads):
"""Calculate the mean gradient for each shared variable across all towers.
Note
----
This function provides a synchronization point across all towers.
Parameters
----------
tower_grads: List of lists of (gradient, variable) tuples.
The outer list is over individual gradients. The inner list is
over the gradient calculation for each tower.
Return
------
List of pairs of (gradient, variable) where the gradient has been
averaged across all towers.
"""
average_grads = []
for grads_and_vars in zip(*tower_grads):
# Note that each grads_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
# TODO no need for the loop here
# grad.append(mean(grad_gpu[0..N]), var_gpu0)
grads = []
for g, _ in grads_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grads_and_vars[0][1]
grads_and_vars = (grad, v)
average_grads.append(grads_and_vars)
return average_grads
def get_mean_baseline(ema_decay=0.99, name=None):
"""ExponentialMovingAverage baseline.
EMA initializes to 0, which introduces a bias. This baseline implements the
bias correction term from Adam (section 3 of
https://arxiv.org/pdf/1412.6980v8.pdf), dividing by `1 - ema_decay^t`, where
`t` is the step count.
Args:
ema_decay: decay rate for the ExponentialMovingAverage.
name: name for variable scope of the ExponentialMovingAverage.
Returns:
Callable baseline function that takes the `DistributionTensor` (unused) and
the downstream `loss`, and returns an EMA of the loss.
"""
def mean_baseline(_, loss):
with vs.variable_scope(name, default_name="MeanBaseline"):
reduced_loss = math_ops.reduce_mean(loss)
ema = training.ExponentialMovingAverage(decay=ema_decay)
update_op = ema.apply([reduced_loss])
# The bias correction term requires keeping track of how many times the
# EMA has been updated. Creating a variable here to do so. The global step
# is not used because it may or may not track exactly the number of times
# the EMA is updated.
ema_var = ema.average(reduced_loss)
assert ema_var is not None
with ops.colocate_with(ema_var):
num_updates = vs.get_variable(
"local_ema_step", initializer=0, trainable=False)
num_updates = num_updates.assign_add(1)
bias_correction = 1. - math_ops.pow(ema_decay, math_ops.cast(
num_updates, reduced_loss.dtype))
with ops.control_dependencies([update_op]):
baseline = ema.average(reduced_loss) / bias_correction
return baseline
return mean_baseline
def sdca_classifier_model_fn(features, targets, mode, params):
"""Estimator's linear model_fn."""
feature_columns = params["feature_columns"]
optimizer = params["optimizer"]
weight_column_name = params["weight_column_name"]
loss_type = params["loss_type"]
if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
raise ValueError("Optimizer must be of type SDCAOptimizer")
loss_fn = {
"logistic_loss": _log_loss_with_two_classes,
"hinge_loss": _hinge_loss,
}[loss_type]
logits, columns_to_variables, bias = (
layers.weighted_sum_from_feature_columns(
columns_to_tensors=features,
feature_columns=feature_columns,
num_outputs=1))
_add_bias_column(feature_columns, features, bias, targets,
columns_to_variables)
loss = None
if mode != estimator.ModeKeys.INFER:
loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")
logging_ops.scalar_summary("loss", loss)
train_op = None
if mode == estimator.ModeKeys.TRAIN:
global_step = contrib_variables.get_global_step()
train_op = optimizer.get_train_step(
columns_to_variables, weight_column_name, loss_type, features,
targets, global_step)
predictions = {}
predictions[_LOGISTIC] = math_ops.sigmoid(logits)
logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
predictions[_PROBABILITIES] = nn.softmax(logits)
predictions[_CLASSES] = math_ops.argmax(logits, 1)
return predictions, loss, train_op
# Ensures consistency with LinearComposableModel.
def weighted_resample(inputs, weights, overall_rate, scope=None,
mean_decay=0.999, warmup=10, seed=None):
"""Performs an approximate weighted resampling of `inputs`.
This method chooses elements from `inputs` where each item's rate of
selection is proportional to its value in `weights`, and the average
rate of selection across all inputs (and many invocations!) is
`overall_rate`.
Args:
inputs: A list of tensors whose first dimension is `batch_size`.
weights: A `[batch_size]`-shaped tensor with each batch member's weight.
overall_rate: Desired overall rate of resampling.
scope: Scope to use for the op.
mean_decay: How quickly to decay the running estimate of the mean weight.
warmup: Until the resulting tensor has been evaluated `warmup`
times, the resampling menthod uses the true mean over all calls
as its weight estimate, rather than a decayed mean.
seed: Random seed.
Returns:
A list of tensors exactly like `inputs`, but with an unknown (and
possibly zero) first dimension.
A tensor containing the effective resampling rate used for each output.
"""
# Algorithm: Just compute rates as weights/mean_weight *
# overall_rate. This way the the average weight corresponds to the
# overall rate, and a weight twice the average has twice the rate,
# etc.
with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
# First: Maintain a running estimated mean weight, with decay
# adjusted (by also maintaining an invocation count) during the
# warmup period so that at the beginning, there aren't too many
# zeros mixed in, throwing the average off.
with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
count_so_far = variable_scope.get_local_variable(
'resample_count', initializer=0)
estimated_mean = variable_scope.get_local_variable(
'estimated_mean', initializer=0.0)
count = count_so_far.assign_add(1)
real_decay = math_ops.minimum(
math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
mean_decay)
batch_mean = math_ops.reduce_mean(weights)
mean = moving_averages.assign_moving_average(
estimated_mean, batch_mean, real_decay, zero_debias=False)
# Then, normalize the weights into rates using the mean weight and
# overall target rate:
rates = weights * overall_rate / mean
results = resample_at_rate([rates] + inputs, rates,
scope=opscope, seed=seed, back_prop=False)
return (results[1:], results[0])
def moments(x, axes, shift=None, name=None, keep_dims=False):
"""Calculate the mean and variance of `x`.
The mean and variance are calculated by aggregating the contents of `x`
across `axes`. If `x` is 1-D and `axes = [0]` this is just the mean
and variance of a vector.
Note: for numerical stability, when shift=None, the true mean
would be computed and used as shift.
When using these moments for batch normalization (see
`tf.nn.batch_normalization`):
* for so-called "global normalization", used with convolutional filters with
shape `[batch, height, width, depth]`, pass `axes=[0, 1, 2]`.
* for simple batch normalization pass `axes=[0]` (batch only).
Args:
x: A `Tensor`.
axes: Array of ints. Axes along which to compute mean and
variance.
shift: A `Tensor` containing the value by which to shift the data for
numerical stability, or `None` in which case the true mean of the data is
used as shift. A shift close to the true mean provides the most
numerically stable results.
name: Name used to scope the operations that compute the moments.
keep_dims: produce moments with the same dimensionality as the input.
Returns:
Two `Tensor` objects: `mean` and `variance`.
"""
#with ops.name_scope(name, "moments", [x, axes, shift]):
if 1:
# The dynamic range of fp16 is too limited to support the collection of
# sufficient statistics. As a workaround we simply perform the operations
# on 32-bit floats before converting the mean and variance back to fp16
y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
if shift is None:
# Compute true mean while keeping the dims for proper broadcasting.
shift = array_ops.stop_gradient(
math_ops.reduce_mean(y, axes, keep_dims=True))
else:
shift = math_ops.cast(shift, y.dtype)
counts, m_ss, v_ss, shift = nn.sufficient_statistics(
y, axes, shift=shift, keep_dims=keep_dims, name=name+'_statistics')
# Reshape shift as needed.
shift = array_ops.reshape(shift, array_ops.shape(m_ss))
shift.set_shape(m_ss.get_shape())
with ops.control_dependencies([counts, m_ss, v_ss]):
mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name)
if x.dtype == dtypes.float16:
return (math_ops.cast(mean, dtypes.float16),
math_ops.cast(variance, dtypes.float16))
else:
return (mean, variance)