def train(self, loss, global_step):
num_batches_per_epoch = self.num_examples_per_epoch
decay_steps = int(num_batches_per_epoch * self.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(self.initial_learning_rate,
global_step,
decay_steps,
self.learning_rate_decay_factor,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = self._add_loss_summaries(loss)
# Compute gradients.
with tf.variable_scope('calculate_gradients'):
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr, epsilon=self.adam_epsilon)
grads = opt.compute_gradients(loss)
# grads = [
# (tf.clip_by_value(tf.where(tf.is_nan(grad), tf.zeros_like(grad),
# grad), -1000.0, 1000.0), var) if grad is not None else
# (tf.zeros_like(var), var) for grad, var in grads]
# Apply gradients.
# grad_check = tf.check_numerics(grads, "NaN or Inf gradients found: ")
# with tf.control_dependencies([grad_check]):
apply_gradient_op = opt.apply_gradients(grads,
global_step=global_step)
# Add histograms for trainable variables.
# for var in tf.trainable_variables():
# #tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
# for grad, var in grads:
# if grad is not None:
# #tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
# variable_averages = tf.train.ExponentialMovingAverage(
# self.moving_average_decay, global_step)
# variables_averages_op = variable_averages.apply(
# tf.trainable_variables())
with tf.control_dependencies(
[apply_gradient_op]):#, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
评论列表
文章目录