def _add_train_graph(self):
"""Define the training operation."""
mc = self.mc
self.global_step = tf.Variable(0, name='global_step', trainable=False)
lr = tf.train.exponential_decay(mc.LEARNING_RATE,
self.global_step,
mc.DECAY_STEPS,
mc.LR_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
_add_loss_summaries(self.loss)
opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=mc.MOMENTUM)
grads_vars = opt.compute_gradients(self.loss, tf.trainable_variables())
with tf.variable_scope('clip_gradient') as scope:
for i, (grad, var) in enumerate(grads_vars):
grads_vars[i] = (tf.clip_by_norm(grad, mc.MAX_GRAD_NORM), var)
apply_gradient_op = opt.apply_gradients(grads_vars, global_step=self.global_step)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
for grad, var in grads_vars:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
with tf.control_dependencies([apply_gradient_op]):
self.train_op = tf.no_op(name='train')
python类no_op()的实例源码
def split_rnn_outputs(model, rnn_outputs):
"""
Split the output of dynamic_rnn into the actual RNN outputs and the state update gate
"""
if using_skip_rnn(model):
return rnn_outputs.h, rnn_outputs.state_gate
else:
return rnn_outputs, tf.no_op()
def test_sampling(self):
hook = hooks.TrainSampleHook(
params={"every_n_steps": 10}, model_dir=self.model_dir,
run_config=tf.contrib.learn.RunConfig())
global_step = tf.contrib.framework.get_or_create_global_step()
no_op = tf.no_op()
hook.begin()
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())
#pylint: disable=W0212
mon_sess = monitored_session._HookedSession(sess, [hook])
# Should trigger for step 0
sess.run(tf.assign(global_step, 0))
mon_sess.run(no_op)
outfile = os.path.join(self.sample_dir, "samples_000000.txt")
with open(outfile, "rb") as readfile:
self.assertIn("Prediction followed by Target @ Step 0",
readfile.read().decode("utf-8"))
# Should not trigger for step 9
sess.run(tf.assign(global_step, 9))
mon_sess.run(no_op)
outfile = os.path.join(self.sample_dir, "samples_000009.txt")
self.assertFalse(os.path.exists(outfile))
# Should trigger for step 10
sess.run(tf.assign(global_step, 10))
mon_sess.run(no_op)
outfile = os.path.join(self.sample_dir, "samples_000010.txt")
with open(outfile, "rb") as readfile:
self.assertIn("Prediction followed by Target @ Step 10",
readfile.read().decode("utf-8"))
def batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay):
""" Batch normalization on convolutional maps and beyond...
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC
is_training: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
moments_dims: a list of ints, indicating dimensions for moments calculation
bn_decay: float or float tensor variable, controling moving average weight
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope) as sc:
num_channels = inputs.get_shape()[-1].value
beta = tf.Variable(tf.constant(0.0, shape=[num_channels]),
name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]),
name='gamma', trainable=True)
batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
decay = bn_decay if bn_decay is not None else 0.9
ema = tf.train.ExponentialMovingAverage(decay=decay)
# Operator that maintains moving averages of variables.
ema_apply_op = tf.cond(is_training,
lambda: ema.apply([batch_mean, batch_var]),
lambda: tf.no_op())
# Update moving average and return current batch's avg and var.
def mean_var_with_update():
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# ema.average returns the Variable holding the average of var.
mean, var = tf.cond(is_training,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
return normed
def add_sync_queues_and_barrier(self, name_prefix, enqueue_after_list):
"""Adds ops to enqueue on all worker queues.
Args:
name_prefix: prefixed for the shared_name of ops.
enqueue_after_list: control dependency from ops.
Returns:
an op that should be used as control dependency before starting next step.
"""
self.sync_queue_counter += 1
with tf.device(self.sync_queue_devices[(
self.sync_queue_counter % len(self.sync_queue_devices))]):
sync_queues = [
tf.FIFOQueue(self.num_workers, [tf.bool], shapes=[[]],
shared_name='%s%s' % (name_prefix, i))
for i in range(self.num_workers)]
queue_ops = []
# For each other worker, add an entry in a queue, signaling that it can
# finish this step.
token = tf.constant(False)
with tf.control_dependencies(enqueue_after_list):
for i, q in enumerate(sync_queues):
if i == self.task_index:
queue_ops.append(tf.no_op())
else:
queue_ops.append(q.enqueue(token))
# Drain tokens off queue for this worker, one for each other worker.
queue_ops.append(
sync_queues[self.task_index].dequeue_many(len(sync_queues) - 1))
return tf.group(*queue_ops)
def _create_train_ops(self, *_):
tf.no_op(name='train_op_1')
def _create_train_ops(self, *_):
tf.no_op(name='train_op_1')
def _create_train_ops(self, *_):
tf.no_op(name='train_op_1')
def _create_train_ops(self, *_):
tf.no_op(name='train_op_1')
def _create_train_ops(self, *_):
tf.no_op(name='train_op_1')
def test_create_from_flags(self):
tf.flags.FLAGS.mode = plan.Plan.mode_keys.TRAIN
tf.flags.FLAGS.truncate_examples = 3
tf.flags.FLAGS.num_multiprocess_processes = 4
tf.flags.FLAGS.master = 'foo'
tf.flags.FLAGS.batches_per_epoch = 123
foo = tf.get_variable('foo', [], tf.float32, tf.constant_initializer(4))
p = plan.Plan.create_from_flags(_setup_plan(
compiler=block_compiler.Compiler.create(blocks.Scalar()),
losses={'foo': foo},
examples=xrange(5)))
self.assertEqual(p.num_multiprocess_processes, 4)
self.assertEqual(p.master, 'foo')
self.assertEqual(p.batches_per_epoch, 123)
self.assertEqual(p.compute_summaries, True)
self.assertEqual(p.is_chief_trainer, True)
self.assertEqual(p.logdir, os.path.join('/tmp/', 'plan', 'run_0', 'train'))
self.assertEqual(p.rundir, os.path.join('/tmp/', 'plan', 'run_0'))
self.assertEqual(p.plandir, os.path.join('/tmp/', 'plan'))
self.assertEqual([0, 1, 2], list(p.examples))
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
self.assertEqual(4, p.loss_total.eval())
sess.run(p.train_op) # should make loss smaller
self.assertLess(p.loss_total.eval(), 4)
tf.flags.FLAGS.num_multiprocess_processes = 0
tf.flags.FLAGS.task = 42
train_op = tf.no_op()
p = plan.Plan.create_from_flags(_setup_plan(
compiler=block_compiler.Compiler.create(blocks.Scalar()),
losses={'foo': tf.constant(3.14)},
train_op=train_op,
examples=xrange(5)))
self.assertEqual(p.num_multiprocess_processes, 0)
self.assertEqual(p.compute_summaries, False)
self.assertEqual(p.is_chief_trainer, False)
self.assertEqual(p.train_op, train_op)
def test_create_from_params(self):
params = plan.plan_default_params()
params.update({
'mode': plan.Plan.mode_keys.TRAIN,
'truncate_examples': 3,
'num_multiprocess_processes': 4,
'master': 'foo',
'batches_per_epoch': 123})
foo = tf.get_variable('foo', [], tf.float32, tf.constant_initializer(4))
p = plan.Plan.create_from_params(_setup_plan(
compiler=block_compiler.Compiler.create(blocks.Scalar()),
losses={'foo': foo},
examples=xrange(5)), params)
self.assertEqual(p.num_multiprocess_processes, 4)
self.assertEqual(p.master, 'foo')
self.assertEqual(p.batches_per_epoch, 123)
self.assertEqual(p.compute_summaries, True)
self.assertEqual(p.is_chief_trainer, True)
self.assertEqual(p.logdir, os.path.join('/tmp/', 'plan', 'run_0', 'train'))
self.assertEqual(p.rundir, os.path.join('/tmp/', 'plan', 'run_0'))
self.assertEqual(p.plandir, os.path.join('/tmp/', 'plan'))
self.assertEqual([0, 1, 2], list(p.examples))
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
self.assertEqual(4, p.loss_total.eval())
sess.run(p.train_op) # should make loss smaller
self.assertLess(p.loss_total.eval(), 4)
tf.flags.FLAGS.num_multiprocess_processes = 0
tf.flags.FLAGS.task = 42
train_op = tf.no_op()
p = plan.Plan.create_from_flags(_setup_plan(
compiler=block_compiler.Compiler.create(blocks.Scalar()),
losses={'foo': tf.constant(3.14)},
train_op=train_op,
examples=xrange(5)))
self.assertEqual(p.num_multiprocess_processes, 0)
self.assertEqual(p.compute_summaries, False)
self.assertEqual(p.is_chief_trainer, False)
self.assertEqual(p.train_op, train_op)
def test_assert_runnable(self):
p = plan.TrainPlan()
self.assertRaisesWithLiteralMatch(
ValueError, 'at least one loss is required', p.assert_runnable)
p.losses['foo'] = tf.constant(42.0)
self.assertRaisesWithLiteralMatch(
ValueError, 'compiler is required', p.assert_runnable)
p.compiler = block_compiler.Compiler.create(blocks.Scalar())
self.assertRaisesWithLiteralMatch(
RuntimeError, 'finalize_stats() has not been called', p.assert_runnable)
p.finalize_stats()
self.assertRaisesWithLiteralMatch(
ValueError, 'logdir is required', p.assert_runnable)
p.logdir = '/tmp/'
self.assertRaisesWithLiteralMatch(
ValueError, 'train_op is required', p.assert_runnable)
p.train_op = tf.no_op()
self.assertRaisesWithLiteralMatch(
ValueError, 'batch_size is required', p.assert_runnable)
p.batch_size = 10
self.assertRaisesWithLiteralMatch(
ValueError, 'either examples or batches_per_epoch is required',
p.assert_runnable)
p.examples = xrange(2)
p.assert_runnable()
p.examples = None
self.assertRaises(ValueError, p.assert_runnable)
p.batches_per_epoch = 42
p.assert_runnable()
def test_dequeue(self):
p = plan.TrainPlan()
p.compiler = block_compiler.Compiler().compile(blocks.Scalar())
p.is_chief_trainer = True
p.batch_size = 3
p.batches_per_epoch = 2
p.queue_capacity = 12
p.num_dequeuers = 1
p.ps_tasks = 1
q = p._create_queue(0)
p._setup_dequeuing([q])
input_batch = list(p.compiler.build_loom_inputs([7])) * 3
q_enqueue = q.enqueue_many([input_batch * 4])
p.losses['foo'], = p.compiler.output_tensors
p.train_op = tf.no_op()
p.finalize_stats()
p.logdir = self.get_temp_dir()
p.epochs = 2
p.print_file = six.StringIO()
init_op = tf.global_variables_initializer()
sv = p.create_supervisor()
with self.test_session() as sess:
sess.run(init_op)
sess.run(q_enqueue)
p.run(sv, sess)
expected = '\n'.join(['running train',
'train_size: 6',
'epoch: 1 train[loss: 7.000e+00]',
'epoch: 2 train[loss: 7.000e+00]',
'final model saved in file: %s' % p.logdir])
log_str = p.print_file.getvalue()
self.assertIn(expected, log_str)
def testMultiGPUPS(self):
deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)
self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
self.assertDeviceEqual(deploy_config.clone_device(0),
'/job:worker/device:GPU:0')
self.assertDeviceEqual(deploy_config.clone_device(1),
'/job:worker/device:GPU:1')
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
self.assertDeviceEqual(deploy_config.optimizer_device(),
'/job:worker/device:CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(),
'/job:worker/device:CPU:0')
def run_train():
fout = open('inf.txt','w+')
test_config = ModelConfig()
test_config.keep_prob = 1.0
test_config.batch_size = 1
Session_config = tf.ConfigProto(allow_soft_placement = True)
Session_config.gpu_options.allow_growth=True
with tf.Graph().as_default(), tf.Session(config=Session_config) as sess:
with tf.device('/gpu:0'):
#if True:
initializer = tf.random_uniform_initializer(-test_config.init_scale,
test_config.init_scale)
train_model = vgg16.Vgg16(FLAGS.vgg16_file_path)
train_model.build(initializer)
data_test = dataset.DataSet(FLAGS.file_path_test,FLAGS.data_root_dir,TEST_SIZE,is_train_set=False)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
saver = tf.train.Saver(max_to_keep=100)
last_epoch = load_model(sess, saver,FLAGS.saveModelPath,train_model)
print ('start: ',last_epoch + 1)
test_accury_1,test_accury_5,test_loss = run_epoch(sess,test_config.keep_prob, fout,test_config.batch_size, train_model, data_test, tf.no_op(),2,test_writer,istraining=False)
info = "Final: Test accury(top 1): %.4f Test accury(top 5): %.4f Loss %.4f" % (test_accury_1,test_accury_5,test_loss)
print (info)
fout.write(info + '\n')
fout.flush()
test_writer.close()
print("Training step is compeleted!")
fout.close()
def run_train():
fout = open('inf.txt','w+')
test_config = ModelConfig()
test_config.keep_prob = 1.0
test_config.batch_size = 1
Session_config = tf.ConfigProto(allow_soft_placement = True)
Session_config.gpu_options.allow_growth=True
with tf.Graph().as_default(), tf.Session(config=Session_config) as sess:
with tf.device('/gpu:3'):
#if True:
initializer = tf.random_uniform_initializer(-test_config.init_scale,
test_config.init_scale)
train_model = vgg16.Vgg16(FLAGS.vgg16_file_path)
train_model.build(initializer)
data_test = dataset.DataSet(FLAGS.file_path_test,FLAGS.data_root_dir,TEST_SIZE,is_train_set=False)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
saver = tf.train.Saver(max_to_keep=100)
last_epoch = load_model(sess, saver,FLAGS.saveModelPath,train_model)
print ('start: ',last_epoch + 1)
test_accury_1,test_accury_5,test_loss = run_epoch(sess,test_config.keep_prob, fout,test_config.batch_size, train_model, data_test, tf.no_op(),2,test_writer,istraining=False)
info = "Final: Test accury(top 1): %.3f Test accury(top 5): %.3f Loss %.3f" % (test_accury_1,test_accury_5,test_loss)
print (info)
fout.write(info + '\n')
fout.flush()
test_writer.close()
print("Training step is compeleted!")
fout.close()
def run_train():
fout = open('inf.txt','w+')
test_config = ModelConfig()
test_config.keep_prob = 1.0
test_config.batch_size = 1
Session_config = tf.ConfigProto(allow_soft_placement = True)
Session_config.gpu_options.allow_growth=True
with tf.Graph().as_default(), tf.Session(config=Session_config) as sess:
with tf.device('/gpu:0'):
#if True:
initializer = tf.random_uniform_initializer(-test_config.init_scale,
test_config.init_scale)
train_model = vgg16.Vgg16(FLAGS.vgg16_file_path)
train_model.build(initializer)
data_test = dataset.DataSet(FLAGS.file_path_test,FLAGS.data_root_dir,TEST_SIZE,is_train_set=False)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
saver = tf.train.Saver(max_to_keep=100)
last_epoch = load_model(sess, saver,FLAGS.saveModelPath,train_model)
print ('start: ',last_epoch + 1)
test_accury_1,test_accury_5,test_loss = run_epoch(sess,test_config.keep_prob, fout,test_config.batch_size, train_model, data_test, tf.no_op(),2,test_writer,istraining=False)
info = "Final: Test accury(top 1): %.4f Test accury(top 5): %.4f Loss %.4f" % (test_accury_1,test_accury_5,test_loss)
print (info)
fout.write(info + '\n')
fout.flush()
test_writer.close()
print("Training step is compeleted!")
fout.close()
def run_train():
fout = open('inf.txt','w+')
test_config = ModelConfig()
test_config.keep_prob = 1.0
test_config.batch_size = 1
Session_config = tf.ConfigProto(allow_soft_placement = True)
Session_config.gpu_options.allow_growth=True
with tf.Graph().as_default(), tf.Session(config=Session_config) as sess:
with tf.device('/gpu:3'):
#if True:
initializer = tf.random_uniform_initializer(-test_config.init_scale,
test_config.init_scale)
train_model = vgg16.Vgg16(FLAGS.vgg16_file_path)
train_model.build(initializer)
data_test = dataset.DataSet(FLAGS.file_path_test,FLAGS.data_root_dir,TEST_SIZE,is_train_set=False)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
saver = tf.train.Saver(max_to_keep=100)
last_epoch = load_model(sess, saver,FLAGS.saveModelPath,train_model)
print ('start: ',last_epoch + 1)
test_accury_1,test_accury_5,test_loss = run_epoch(sess,test_config.keep_prob, fout,test_config.batch_size, train_model, data_test, tf.no_op(),2,test_writer,istraining=False)
info = "Final: Test accury(top 1): %.3f Test accury(top 5): %.3f Loss %.3f" % (test_accury_1,test_accury_5,test_loss)
print (info)
fout.write(info + '\n')
fout.flush()
test_writer.close()
print("Training step is compeleted!")
fout.close()
def apply_stats(self, statsUpdates):
""" compute stats and update/apply the new stats to the running average
"""
def updateAccumStats():
if self._full_stats_init:
return tf.cond(tf.greater(self.sgd_step, self._cold_iter), lambda: tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter)), tf.no_op)
else:
return tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter))
def updateRunningAvgStats(statsUpdates, fac_iter=1):
# return tf.cond(tf.greater_equal(self.factor_step,
# tf.convert_to_tensor(fac_iter)), lambda:
# tf.group(*self._apply_stats(stats_list, varlist)), tf.no_op)
return tf.group(*self._apply_stats(statsUpdates))
if self._async_stats:
# asynchronous stats update
update_stats = self._apply_stats(statsUpdates)
queue = tf.FIFOQueue(1, [item.dtype for item in update_stats], shapes=[
item.get_shape() for item in update_stats])
enqueue_op = queue.enqueue(update_stats)
def dequeue_stats_op():
return queue.dequeue()
self.qr_stats = tf.train.QueueRunner(queue, [enqueue_op])
update_stats_op = tf.cond(tf.equal(queue.size(), tf.convert_to_tensor(
0)), tf.no_op, lambda: tf.group(*[dequeue_stats_op(), ]))
else:
# synchronous stats update
update_stats_op = tf.cond(tf.greater_equal(
self.stats_step, self._stats_accum_iter), lambda: updateRunningAvgStats(statsUpdates), updateAccumStats)
self._update_stats_op = update_stats_op
return update_stats_op