def _add_train_graph(self):
"""Define the training operation."""
mc = self.mc
self.global_step = tf.Variable(0, name='global_step', trainable=False)
lr = tf.train.exponential_decay(mc.LEARNING_RATE,
self.global_step,
mc.DECAY_STEPS,
mc.LR_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
_add_loss_summaries(self.loss)
opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=mc.MOMENTUM)
grads_vars = opt.compute_gradients(self.loss, tf.trainable_variables())
with tf.variable_scope('clip_gradient') as scope:
for i, (grad, var) in enumerate(grads_vars):
grads_vars[i] = (tf.clip_by_norm(grad, mc.MAX_GRAD_NORM), var)
apply_gradient_op = opt.apply_gradients(grads_vars, global_step=self.global_step)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
for grad, var in grads_vars:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
with tf.control_dependencies([apply_gradient_op]):
self.train_op = tf.no_op(name='train')
评论列表
文章目录