def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
#print x.get_shape(), params_shape
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.histogram_summary(mean.op.name, mean)
tf.histogram_summary(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
python类assign_moving_average()的实例源码
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
x_bn = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001)
x_bn.set_shape(x.get_shape())
return x_bn
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# epsilon used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def _build_update_ops(self, mean, variance, is_training):
"""Builds the moving average update ops when using moving variance.
Args:
mean: The mean value to update with.
variance: The variance value to update with.
is_training: Boolean Tensor to indicate if we're currently in
training mode.
Returns:
Tuple of `(update_mean_op, update_variance_op)` when `is_training` is or
could be `True`. Returns `None` when `is_training=False`.
"""
def build_update_ops():
"""Builds the exponential moving average update ops."""
update_mean_op = moving_averages.assign_moving_average(
variable=self._moving_mean,
value=mean,
decay=self._decay_rate,
zero_debias=False,
name="update_moving_mean").op
update_variance_op = moving_averages.assign_moving_average(
variable=self._moving_variance,
value=variance,
decay=self._decay_rate,
zero_debias=False,
name="update_moving_variance").op
return update_mean_op, update_variance_op
def build_no_ops():
return (tf.no_op(), tf.no_op())
# Only make the ops if we know that `is_training=True`, or the value of
# `is_training` is unknown.
is_training_const = utils.constant_value(is_training)
if is_training_const is None or is_training_const:
update_mean_op, update_variance_op = utils.smart_cond(
is_training,
build_update_ops,
build_no_ops,
)
return (update_mean_op, update_variance_op)
else:
return None
def bn(x, c):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
if c['use_bias']:
bias = _get_variable('bias', params_shape,
initializer=tf.zeros_initializer())
return x + bias
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer())
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer())
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer(),
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer(),
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean, BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
c['is_training'], lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
#x.set_shape(inputs.get_shape()) ??
return x
def bn(x, c):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
axis = list(range(len(x_shape) - 1))
beta = tf.get_variable('beta',
shape=params_shape,
initializer=tf.zeros_initializer(),
dtype='float32',
collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES],
trainable=True)
gamma = tf.get_variable('gamma',
shape=params_shape,
initializer=tf.ones_initializer(),
dtype='float32',
collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES],
trainable=True)
moving_mean = tf.get_variable('moving_mean',
shape=params_shape,
initializer=tf.zeros_initializer(),
dtype='float32',
collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES],
trainable=False)
moving_variance = tf.get_variable('moving_variance',
shape=params_shape,
initializer=tf.ones_initializer(),
dtype='float32',
collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES],
trainable=False)
# These ops will only be performed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean, BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
c['is_training'], lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
return x
# resnet block
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def _bn(x, is_training, hypes):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer())
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer())
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer(),
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer(),
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean,
BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
if hypes['use_moving_average_bn']:
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
is_training, lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
else:
mean, variance = mean, variance
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
# x.set_shape(inputs.get_shape()) ??
return x
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def bn(x, c):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
if c['use_bias']:
bias = _get_variable('bias', params_shape,
initializer=tf.zeros_initializer)
return x + bias
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer)
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer)
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer,
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer,
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean, BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
c['is_training'], lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
return x
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
factor = tf.get_variable(
'factor', 1, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
if self.bn:
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
# inv_factor = tf.reciprocal(factor)
inv_factor = tf.div(1., factor)
mean = tf.multiply(inv_factor, mean)
variance = tf.multiply(inv_factor, variance)
# tf.summary.histogram(mean.op.name, mean)
# tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def _bn(x, is_training, hypes):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer())
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer())
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer(),
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer(),
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean,
BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
if hypes['use_moving_average_bn']:
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
is_training, lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
else:
mean, variance = mean, variance
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
# x.set_shape(inputs.get_shape()) ??
return x
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# epsilon used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def weighted_resample(inputs, weights, overall_rate, scope=None,
mean_decay=0.999, warmup=10, seed=None):
"""Performs an approximate weighted resampling of `inputs`.
This method chooses elements from `inputs` where each item's rate of
selection is proportional to its value in `weights`, and the average
rate of selection across all inputs (and many invocations!) is
`overall_rate`.
Args:
inputs: A list of tensors whose first dimension is `batch_size`.
weights: A `[batch_size]`-shaped tensor with each batch member's weight.
overall_rate: Desired overall rate of resampling.
scope: Scope to use for the op.
mean_decay: How quickly to decay the running estimate of the mean weight.
warmup: Until the resulting tensor has been evaluated `warmup`
times, the resampling menthod uses the true mean over all calls
as its weight estimate, rather than a decayed mean.
seed: Random seed.
Returns:
A list of tensors exactly like `inputs`, but with an unknown (and
possibly zero) first dimension.
A tensor containing the effective resampling rate used for each output.
"""
# Algorithm: Just compute rates as weights/mean_weight *
# overall_rate. This way the the average weight corresponds to the
# overall rate, and a weight twice the average has twice the rate,
# etc.
with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
# First: Maintain a running estimated mean weight, with decay
# adjusted (by also maintaining an invocation count) during the
# warmup period so that at the beginning, there aren't too many
# zeros mixed in, throwing the average off.
with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
count_so_far = variable_scope.get_local_variable(
'resample_count', initializer=0)
estimated_mean = variable_scope.get_local_variable(
'estimated_mean', initializer=0.0)
count = count_so_far.assign_add(1)
real_decay = math_ops.minimum(
math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
mean_decay)
batch_mean = math_ops.reduce_mean(weights)
mean = moving_averages.assign_moving_average(
estimated_mean, batch_mean, real_decay, zero_debias=False)
# Then, normalize the weights into rates using the mean weight and
# overall target rate:
rates = weights * overall_rate / mean
results = resample_at_rate([rates] + inputs, rates,
scope=opscope, seed=seed, back_prop=False)
return (results[1:], results[0])
def bn(x, c):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
if c['use_bias']:
bias = _get_variable('bias', params_shape,
initializer=tf.zeros_initializer)
return x + bias
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer)
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer)
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer,
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer,
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean, BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
c['is_training'], lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
#x.set_shape(inputs.get_shape()) ??
return x
def batch_normalization(self, input, name,
scale_offset=True,
relu=False,
decay=0.999,
moving_vars='moving_vars'):
# NOTE: Currently, only inference is supported
with tf.variable_scope(name):
axis = list(range(len(input.get_shape()) - 1))
shape = [input.get_shape()[-1]]
if scale_offset:
scale = self.make_var('scale', shape=shape,
initializer=tf.ones_initializer(),
trainable=self.trainable)
offset = self.make_var('offset', shape=shape,
initializer=tf.zeros_initializer(),
trainable=self.trainable)
else:
scale, offset = (None, None)
# Create moving_mean and moving_variance add them to
# GraphKeys.MOVING_AVERAGE_VARIABLES collections.
moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES]
moving_mean = self.make_var('mean',
shape,
initializer=tf.zeros_initializer(),
trainable=False,
collections=moving_collections)
moving_variance = self.make_var('variance',
shape,
initializer=tf.ones_initializer(),
trainable=False,
collections=moving_collections)
if self.trainable:
# Calculate the moments based on the individual batch.
mean, variance = tf.nn.moments(input, axis)
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
else:
# Just use the moving_mean and moving_variance.
mean = moving_mean
variance = moving_variance
output = tf.nn.batch_normalization(
input,
mean=mean,
variance=variance,
offset=offset,
scale=scale,
# TODO: This is the default Caffe batch norm eps
# Get the actual eps from parameters
variance_epsilon=1e-5,
name=name)
if relu:
output = tf.nn.relu(output)
return output
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
#beta = Qmf_quan(beta, 4, 7)
#gamma = Qmf_quan(gamma, 4, 7)
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
#moving_mean = Qmf_quan(moving_mean, 4, 7)
#moving_variance = Qmf_quan(moving_variance, 4, 7)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
#mean = Qmf_quan(mean, 4, 7)
#variance = Qmf_quan(variance, 4, 7)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def _batch_norm_vec(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
#beta = Qmf_quan(beta, 4, 7)
#gamma = Qmf_quan(gamma, 4, 7)
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
#moving_mean = Qmf_quan(moving_mean, 4, 7)
#moving_variance = Qmf_quan(moving_variance, 4, 7)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
#mean = Qmf_quan(mean, 4, 7)
#variance = Qmf_quan(variance, 4, 7)
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def batch_norm_new(name, input_var, is_train, decay=0.999, epsilon=1e-5):
"""Batch normalization modified from BatchNormLayer in Tensorlayer.
Source: <https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/layers.py#L2190>
"""
inputs_shape = input_var.get_shape()
axis = list(range(len(inputs_shape) - 1))
params_shape = inputs_shape[-1:]
with tf.variable_scope(name) as scope:
# Trainable beta and gamma variables
beta = tf.get_variable('beta',
shape=params_shape,
initializer=tf.zeros_initializer)
gamma = tf.get_variable('gamma',
shape=params_shape,
initializer=tf.random_normal_initializer(mean=1.0, stddev=0.002))
# Moving mean and variance updated during training
moving_mean = tf.get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer,
trainable=False)
moving_variance = tf.get_variable('moving_variance',
params_shape,
initializer=tf.constant_initializer(1.),
trainable=False)
# Compute mean and variance along axis
batch_mean, batch_variance = tf.nn.moments(input_var, axis, name='moments')
# Define ops to update moving_mean and moving_variance
update_moving_mean = moving_averages.assign_moving_average(moving_mean, batch_mean, decay, zero_debias=False)
update_moving_variance = moving_averages.assign_moving_average(moving_variance, batch_variance, decay, zero_debias=False)
# Define a function that :
# 1. Update moving_mean & moving_variance with batch_mean & batch_variance
# 2. Then return the batch_mean & batch_variance
def mean_var_with_update():
with tf.control_dependencies([update_moving_mean, update_moving_variance]):
return tf.identity(batch_mean), tf.identity(batch_variance)
# Perform different ops for training and testing
if is_train:
mean, variance = mean_var_with_update()
normed = tf.nn.batch_normalization(input_var, mean, variance, beta, gamma, epsilon)
else:
normed = tf.nn.batch_normalization(input_var, moving_mean, moving_variance, beta, gamma, epsilon)
# mean, variance = tf.cond(
# is_train,
# mean_var_with_update, # Training
# lambda: (moving_mean, moving_variance) # Testing - it will use the moving_mean and moving_variance (fixed during test) that are computed during training
# )
# normed = tf.nn.batch_normalization(input_var, mean, variance, beta, gamma, epsilon)
return normed
def _bn(x, is_training, hypes):
x_shape = x.get_shape()
params_shape = x_shape[-1:]
axis = list(range(len(x_shape) - 1))
beta = _get_variable('beta',
params_shape,
initializer=tf.zeros_initializer())
gamma = _get_variable('gamma',
params_shape,
initializer=tf.ones_initializer())
moving_mean = _get_variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer(),
trainable=False)
moving_variance = _get_variable('moving_variance',
params_shape,
initializer=tf.ones_initializer(),
trainable=False)
# These ops will only be preformed when training.
mean, variance = tf.nn.moments(x, axis)
update_moving_mean = moving_averages.assign_moving_average(moving_mean,
mean,
BN_DECAY)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, BN_DECAY)
if hypes['use_moving_average_bn']:
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
mean, variance = control_flow_ops.cond(
is_training, lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
else:
mean, variance = mean, variance
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
# x.set_shape(inputs.get_shape()) ??
return x