def omniglot():
sess = tf.InteractiveSession()
""" def wrapper(v):
return tf.Print(v, [v], message="Printing v")
v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
temp = wrapper(v)
#with tf.control_dependencies([temp]):
temp.eval()
print 'Hello'"""
def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:]
val = tf.cast(val, V.dtype)
def body(_, (v, d2, chg)):
d2_int = tf.cast(d2, tf.int32)
return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
return Z
python类control_dependencies()的实例源码
def value_transition(self, curr_state, next_symbols, batch_size):
first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens
num_value_tokens = self.output_size - first_value_token
with tf.name_scope('grammar_transition'):
adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols)
assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols])
with tf.control_dependencies([assert1]):
transitions = tf.gather(tf.constant(self.transition_matrix), curr_state)
assert transitions.get_shape()[1:] == (self.output_size,)
indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1)
next_state = tf.gather_nd(transitions, indices)
assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state])
with tf.control_dependencies([assert2]):
return tf.identity(next_state)
def append_apply_gradients_ops(self, gradient_state, opt, grads, training_ops,
loss_scale_params):
device_grads = gradient_state # From 2nd result of preprocess_device_grads.
def get_apply_gradients_ops_func():
"""Returns a list of ops for updating gradients."""
apply_gradients_ops = []
# For each variable, apply the combined gradients for this server on
# the parameter server, and then wait for all other servers to do this.
for i, (g, v) in enumerate(grads):
apply_gradient_op = opt.apply_gradients([(g, v)])
barrier = self.benchmark_cnn.add_sync_queues_and_barrier(
'replicate_variable_%s' % i, [apply_gradient_op])
with tf.control_dependencies([barrier]):
with tf.device(self.benchmark_cnn.cpu_device):
updated_value = v.read_value()
for my_d in range(len(self.benchmark_cnn.devices)):
apply_gradients_ops.append(
device_grads[my_d][i][1].assign(updated_value))
return apply_gradients_ops
variable_mgr_util.append_gradients_with_loss_scale(
training_ops, get_apply_gradients_ops_func, loss_scale_params,
self.grad_has_inf_nan)
def batch_norm_layer(self, to_be_normalized, is_training):
if is_training:
train_phase = tf.constant(1)
else:
train_phase = tf.constant(-1)
beta = tf.Variable(tf.constant(0.0, shape=[to_be_normalized.shape[-1]]), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[to_be_normalized.shape[-1]]), name='gamma', trainable=True)
# axises = np.arange(len(to_be_normalized.shape) - 1) # change to apply tensorflow 1.3
axises = [0,1,2]
print("start nn.moments")
print("axises : " + str(axises))
batch_mean, batch_var = tf.nn.moments(to_be_normalized, axises, name='moments')
print("nn.moments successful")
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase > 0, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) # if is training --> update
normed = tf.nn.batch_normalization(to_be_normalized, mean, var, beta, gamma, 1e-3)
return normed
def input_norm(xs):
fc_mean, fc_var = tf.nn.moments(
xs,
axes=[0],
)
scale = tf.Variable(tf.ones([1]))
shift = tf.Variable(tf.zeros([1]))
epsilon = 0.001
# apply moving average for mean and var when train on batch
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([fc_mean, fc_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(fc_mean), tf.identity(fc_var)
mean, var = mean_var_with_update()
xs = tf.nn.batch_normalization(xs, mean, var, shift, scale, epsilon)
return xs
def batch_norm(Wx_plus_b,out_size):
fc_mean, fc_var = tf.nn.moments(
Wx_plus_b,
axes=[0], # the dimension you wanna normalize, here [0] for batch
# for image, you wanna do [0, 1, 2] for [batch, height, width] but not channel
)
scale = tf.Variable(tf.ones([out_size]))
shift = tf.Variable(tf.zeros([out_size]))
epsilon = 0.001
# apply moving average for mean and var when train on batch
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([fc_mean, fc_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(fc_mean), tf.identity(fc_var)
mean, var = mean_var_with_update()
Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, mean, var, shift, scale, epsilon)
return Wx_plus_b
def build_model(self):
Gen=GeneratorTypes[self.gan_type]
config=self.config
self.gen=Gen(config.batch_size,config.gen_hidden_size,config.gen_z_dim)
with tf.variable_scope('Disc') as scope:
self.D1 = Discriminator(self.data.X, config.disc_hidden_size)
scope.reuse_variables()
self.D2 = Discriminator(self.gen.X, config.disc_hidden_size)
d_var = tf.contrib.framework.get_variables(scope)
d_loss_real=tf.reduce_mean( sxe(self.D1,1) )
d_loss_fake=tf.reduce_mean( sxe(self.D2,0) )
self.loss_d = d_loss_real + d_loss_fake
self.loss_g = tf.reduce_mean( sxe(self.D2,1) )
optimizer=tf.train.AdamOptimizer
g_optimizer=optimizer(self.config.lr_gen)
d_optimizer=optimizer(self.config.lr_disc)
self.opt_d = d_optimizer.minimize(self.loss_d,var_list= d_var)
self.opt_g = g_optimizer.minimize(self.loss_g,var_list= self.gen.tr_var,
global_step=self.gen.step)
with tf.control_dependencies([self.inc_step]):
self.train_op=tf.group(self.opt_d,self.opt_g)
def __call__(self, x, train=True):
shape = x.get_shape().as_list()
if train:
with tf.variable_scope(self.name) as scope:
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
self.gamma = tf.get_variable("gamma", [shape[-1]],
initializer=tf.random_normal_initializer(1., 0.02))
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema_apply_op = self.ema.apply([batch_mean, batch_var])
self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
with tf.control_dependencies([ema_apply_op]):
mean, var = tf.identity(batch_mean), tf.identity(batch_var)
else:
mean, var = self.ema_mean, self.ema_var
normed = tf.nn.batch_norm_with_global_normalization(
x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
return normed
# standard convolution layer
def apply_gradients(self, grads_and_vars, global_step=None):
"""Apply gradients to model variables specified in `grads_and_vars`.
`apply_gradients` returns an op that calls
`tf.train.Optimizer.apply_gradients` and then zeros the gradient
variables stored in `self.grads_and_vars`.
Args:
grads_and_vars (list): Description.
global_step (None, optional): tensorflow global_step variable.
Returns:
(tf.Operation): Applies gradient update to model followed by an
internal gradient zeroing operation to `self.grads_and_vars`.
"""
self.mini_flag = tf.assign(self.mini_flag, tf.constant([0], dtype = tf.float32))
# grads_and_vars = self.aggregate_gradients(grads_and_vars, method='average')
with tf.control_dependencies([self.mini_flag]):
optimize = self._optimizer.apply_gradients(grads_and_vars,
global_step=global_step)
#return [optimize, self.zero_grad()]
return optimize
def build_all(self, param_avg=False):
"""Build all nodes."""
if self._has_built_all:
raise Exception('Only call build_all or build_eval once.')
self._has_built_all = True
with tf.device(self.get_device_fn()):
with tf.variable_scope(self.name):
inp_var = self.build_input()
output_var = self.build(inp_var)
loss_var = self.build_loss(inp_var, output_var)
train_step = self.build_optim(loss_var)
if param_avg:
ema_op, avg_var = self.get_average_var()
self._avg_var = avg_var
with tf.control_dependencies([train_step, ema_op]):
train_step = tf.no_op(name='train_step')
self.register_var('train_step', train_step)
return self
def build_model(self):
self.build_memory()
self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std))
z = tf.matmul(self.hid[-1], self.W)
self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target)
self.lr = tf.Variable(self.current_lr)
self.opt = tf.train.GradientDescentOptimizer(self.lr)
params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W]
grads_and_vars = self.opt.compute_gradients(self.loss,params)
clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \
for gv in grads_and_vars]
inc = self.global_step.assign_add(1)
with tf.control_dependencies([inc]):
self.optim = self.opt.apply_gradients(clipped_grads_and_vars)
tf.initialize_all_variables().run()
self.saver = tf.train.Saver()
def normalize(self, x, train=True):
"""
Returns a batch-normalized version of x.
"""
if train is not None:
mean, variance = tf.nn.moments(x, [0, 1, 2])
assign_mean = self.mean.assign(mean)
assign_variance = self.variance.assign(variance)
with tf.control_dependencies([assign_mean, assign_variance]):
return tf.nn.batch_norm_with_global_normalization(x, mean, variance, self.beta, self.gamma, self.epsilon, self.scale_after_norm)
else:
mean = self.ewma_trainer.average(self.mean)
variance = self.ewma_trainer.average(self.variance)
local_beta = tf.identity(self.beta)
local_gamma = tf.identity(self.gamma)
return tf.nn.batch_norm_with_global_normalization(x, mean, variance, local_beta, local_gamma, self.epsilon, self.scale_after_norm)
model.py 文件源码
项目:Saliency_Detection_Convolutional_Autoencoder
作者: arthurmeyer
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def train(self, loss, global_step):
"""
Return a training step for the tensorflow graph
Args:
loss : loss to do sgd on
global_step : which step are we at
"""
opt = tf.train.AdamOptimizer(self.learning_rate)
grads = opt.compute_gradients(loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(self.moving_avg_decay, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def omniglot():
sess = tf.InteractiveSession()
""" def wrapper(v):
return tf.Print(v, [v], message="Printing v")
v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
temp = wrapper(v)
#with tf.control_dependencies([temp]):
temp.eval()
print 'Hello'"""
def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:]
val = tf.cast(val, V.dtype)
def body(_, (v, d2, chg)):
d2_int = tf.cast(d2, tf.int32)
return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
return Z
def training_step(loss, optimizer_handle, learning_rate, **kwargs):
'''
Creates the optimisation operation which is executed in each training iteration of the network
:param loss: The loss to be minimised
:param optimizer_handle: A handle to one of the tf optimisers
:param learning_rate: Learning rate
:param momentum: Optionally, you can also pass a momentum term to the optimiser.
:return: The training operation
'''
if 'momentum' in kwargs:
momentum = kwargs.get('momentum')
optimizer = optimizer_handle(learning_rate=learning_rate, momentum=momentum)
else:
optimizer = optimizer_handle(learning_rate=learning_rate)
# The with statement is needed to make sure the tf contrib version of batch norm properly performs its updates
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss)
return train_op
def image_reading(path: str, resized_size: Tuple[int, int]=None, data_augmentation: bool=False,
padding: bool=False) -> Tuple[tf.Tensor, tf.Tensor]:
# Read image
image_content = tf.read_file(path, name='image_reader')
image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)),
true_fn=lambda: tf.image.decode_jpeg(image_content, channels=1, try_recover_truncated=True), # TODO channels = 3 ?
false_fn=lambda: tf.image.decode_png(image_content, channels=1), name='image_decoding')
# Data augmentation
if data_augmentation:
image = augment_data(image)
# Padding
if padding:
with tf.name_scope('padding'):
image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING)
# Resize
else:
image = tf.image.resize_images(image, size=resized_size)
img_width = tf.shape(image)[1]
with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]):
return image, img_width
def batch_norm(self, X):
train_phase = self.train_phase
with tf.name_scope('bn'):
n_out = X.get_shape()[-1:]
beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True)
# batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase, mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm(self, X):
train_phase = self.train_phase
with tf.name_scope('bn'):
n_out = X.get_shape()[-1:]
beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True)
# batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase, mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm(self, X):
train_phase = self.train_phase
with tf.name_scope('bn'):
n_out = X.get_shape()[-1:]
beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True)
# batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase, mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm(self, X):
train_phase = self.train_phase
with tf.name_scope('bn'):
n_out = X.get_shape()[-1:]
beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True)
# batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase, mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm(self, X):
train_phase = self.train_phase
with tf.name_scope('bn'):
n_out = X.get_shape()[-1:]
beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True)
# batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(train_phase, mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3)
return normed
def batch_norm(x, n_out, phase_train, scope='bn', decay=0.9, eps=1e-5, stddev=0.02):
"""
Code taken from http://stackoverflow.com/a/34634291/2267819
"""
with tf.variable_scope(scope):
beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0)
, trainable=True)
gamma = tf.get_variable(name='gamma', shape=[n_out], initializer=tf.random_normal_initializer(1.0, stddev),
trainable=True)
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=decay)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
return normed
def train_simple(total_loss, global_step):
with tf.variable_scope('train_op'):
# Variables that affect learning rate.
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# with tf.control_dependencies(update_ops):
# opt = tf.train.MomentumOptimizer(lr, 0.9).minimize(total_loss, global_step=global_step)
opt = tf.train.AdamOptimizer(lr).minimize(total_loss, global_step=global_step)
tf.summary.scalar(total_loss.op.name + ' (raw)', total_loss)
return opt, lr
def __init__(self, inputs, outputs, updates=[]):
assert type(inputs) in {list, tuple}, 'Input to a TensorFlow backend function should be a list or tuple.'
assert type(outputs) in {list, tuple}, 'Output to a TensorFlow backend function should be a list or tuple.'
assert type(updates) in {list, tuple}, 'Updates in a TensorFlow backend function should be a list or tuple.'
self.inputs = list(inputs)
self.outputs = list(outputs)
with tf.control_dependencies(self.outputs):
updates_ops = []
for update in updates:
if type(update) is tuple:
p, new_p = update
updates_ops.append(tf.assign(p, new_p))
else:
# assumed already an op
updates_ops.append(update)
self.updates_op = tf.group(*updates_ops)
def get_output_for(self, input, phase='train', **kwargs):
if phase == 'train':
# Calculate the moments based on the individual batch.
mean, variance = tf.nn.moments(input, self.axis, shift=self.moving_mean)
# Update the moving_mean and moving_variance moments.
update_moving_mean = moving_averages.assign_moving_average(
self.moving_mean, mean, self.decay)
update_moving_variance = moving_averages.assign_moving_average(
self.moving_variance, variance, self.decay)
# Make sure the updates are computed here.
with tf.control_dependencies([update_moving_mean,
update_moving_variance]):
output = tf.nn.batch_normalization(
input, mean, variance, self.beta, self.gamma, self.epsilon)
else:
output = tf.nn.batch_normalization(
input, self.moving_mean, self.moving_variance, self.beta, self.gamma, self.epsilon)
output.set_shape(self.input_shape)
return output
def batch_norm_wrapper(inputs, is_training, decay = 0.999):
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training is not None:
batch_mean, batch_var = tf.nn.moments(inputs,[0])
train_mean = tf.assign(pop_mean,
pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var,
pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,
train_mean, train_var, beta, scale, epsilon)
else:
return tf.nn.batch_normalization(inputs,
pop_mean, pop_var, beta, scale, epsilon)
## regularization parameter
def batch_norm_wrapper(inputs, is_training, decay = 0.999):
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training is not None:
batch_mean, batch_var = tf.nn.moments(inputs,[0])
train_mean = tf.assign(pop_mean,
pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var,
pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,
train_mean, train_var, beta, scale, epsilon)
else:
return tf.nn.batch_normalization(inputs,
pop_mean, pop_var, beta, scale, epsilon)
## regularization parameter
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
with tf.name_scope(name, self._name) as name:
update_op = self._opt.apply_gradients(
grads_and_vars, global_step=global_step)
add_noise_ops = []
with tf.control_dependencies([update_op]):
for grad, var in grads_and_vars:
if grad is None:
continue
with tf.name_scope("sgld_noise_" + var.op.name):
if isinstance(grad, tf.Tensor):
add_noise_ops.append(self._noise_dense(var))
else:
add_noise_ops.append(self._noise_sparse(grad, var))
## running combined op
return tf.group(*([update_op] + add_noise_ops), name=name)
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
with tf.name_scope(name, self._name) as name:
update_op = self._opt.apply_gradients(
grads_and_vars, global_step=global_step)
add_noise_ops = []
with tf.control_dependencies([update_op]):
for grad, var in grads_and_vars:
if grad is None:
continue
with tf.name_scope("psgld_noise_" + var.op.name):
if isinstance(grad, tf.Tensor):
add_noise_ops.append(self._noise_dense(var))
else:
add_noise_ops.append(self._noise_sparse(grad, var))
## running combined op
return tf.group(*([update_op] + add_noise_ops), name=name)
def __call__(self, x, train=True):
shape = x.get_shape().as_list()
if train:
with tf.variable_scope(self.name) as scope:
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
self.gamma = tf.get_variable("gamma", [shape[-1]],
initializer=tf.random_normal_initializer(1., 0.02))
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema_apply_op = self.ema.apply([batch_mean, batch_var])
self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
with tf.control_dependencies([ema_apply_op]):
mean, var = tf.identity(batch_mean), tf.identity(batch_var)
else:
mean, var = self.ema_mean, self.ema_var
normed = tf.nn.batch_norm_with_global_normalization(
x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
return normed