def add_training_op(self, loss):
#optimizer = tf.train.AdamOptimizer(self.config.lr)
#optimizer = tf.train.AdagradOptimizer(self.config.lr)
optclass = getattr(tf.train, self.config.optimizer + 'Optimizer')
assert issubclass(optclass, tf.train.Optimizer)
optimizer = optclass(self.config.learning_rate)
gradient_var_pairs = optimizer.compute_gradients(loss)
vars = [x[1] for x in gradient_var_pairs]
gradients = [x[0] for x in gradient_var_pairs]
if self.config.gradient_clip > 0:
clipped, _ = tf.clip_by_global_norm(gradients, self.config.gradient_clip)
else:
clipped = gradients
self.grad_norm = tf.global_norm(clipped)
train_op = optimizer.apply_gradients(zip(clipped, vars))
return train_op
python类clip_by_global_norm()的实例源码
base_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def get_training_tensors(self, learning_rate = 0.001, grad_clip = 5):
#-----------------------------------------------------------------------
# Build a loss function
#-----------------------------------------------------------------------
with tf.name_scope('targets-encode'):
y_one_hot = tf.one_hot(self.targets, self.n_classes)
y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape())
with tf.name_scope('loss'):
loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
labels=y_reshaped)
loss = tf.reduce_mean(loss)
tf.summary.scalar('loss', loss)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.name_scope('optimizer'):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return loss, optimizer
def get_optimizer(self, learning_rate = 0.001, grad_clip = 5):
#-----------------------------------------------------------------------
# Build a loss function
#-----------------------------------------------------------------------
with tf.variable_scope('loss'):
loss = tf.losses.mean_squared_error(self.target, self.output)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.variable_scope('optimizer'):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return optimizer, loss
def build_model(self):
self.model = classmap[FLAGS.model_type](hidden_size=FLAGS.hidden,
vocab_size=self.vocab_size,
encoder_in_size=self.data.feats.shape[-1],
encoder_in_length=self.data.feats.shape[1],
decoder_in_length=self.data.decoder_in.shape[-1] - 1,
word2vec_weight=self.w2v_W,
embedding_size=FLAGS.embedding_dim,
neg_sample_num=self.sample_num,
start_id=self.vocab_processor._mapping['<BOS>'],
end_id=self.vocab_processor._mapping['<EOS>'],
Bk=FLAGS.K)
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.optimizer = tf.train.RMSPropOptimizer(FLAGS.lr)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.model.cost, tvars), 5)
self.updates = self.optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step)
self.saver = tf.train.Saver(tf.global_variables())
def get_update_op(self, loss, opts, global_step=None, max_gradient_norm=None, freeze_variables=None):
if loss is None:
return None
freeze_variables = freeze_variables or []
# compute gradient only for variables that are not frozen
frozen_parameters = [var.name for var in tf.trainable_variables()
if any(re.match(var_, var.name) for var_ in freeze_variables)]
params = [var for var in tf.trainable_variables() if var.name not in frozen_parameters]
self.params = params
gradients = tf.gradients(loss, params)
if max_gradient_norm:
gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
update_ops = []
for opt in opts:
with tf.variable_scope('gradients' if self.name is None else 'gradients_{}'.format(self.name)):
update_op = opt.apply_gradients(list(zip(gradients, params)), global_step=global_step)
update_ops.append(update_op)
return update_ops
def _create_optimizer(self):
print('Create optimizer... ')
with tf.variable_scope('training'):
self.global_step = tf.Variable(
0, dtype=tf.int32, trainable=False, name='global_step')
if not self.fw_only:
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
trainable_vars = tf.trainable_variables()
self.gradient_norms = []
self.train_ops = []
start = time.time()
for bucket_id in range(len(config.BUCKETS)):
clipped_grads, norm = tf.clip_by_global_norm(
tf.gradients(self.losses[bucket_id], trainable_vars),
config.MAX_GRAD_NORM)
self.gradient_norms.append(norm)
self.train_ops.append(self.optimizer.apply_gradients(
zip(clipped_grads, trainable_vars),
global_step=self.global_step))
print('Creating opt for bucket {:d} took {:.2f} seconds.'.format(
bucket_id, time.time() - start))
start = time.time()
def init_optimizer(self):
print("setting optimizer..")
# Gradients and SGD update operation for training the model
trainable_params = tf.trainable_variables()
if self.optimizer.lower() == 'adadelta':
self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'adam':
self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'rmsprop':
self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
else:
self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
# Compute gradients of loss w.r.t. all trainable variables
gradients = tf.gradients(self.loss, trainable_params)
# Clip gradients by a given maximum_gradient_norm
clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm)
# Update the model
self.updates = self.opt.apply_gradients(
zip(clip_gradients, trainable_params), global_step=self.global_step)
def setup_train_op(self):
"""
Add train_op to self
"""
with tf.variable_scope("train_step"):
adam_optimizer = tf.train.AdamOptimizer()
grads, vars = zip(*adam_optimizer.compute_gradients(self.loss))
clip_val = self.config.max_gradient_norm
# if -1 then do not perform gradient clipping
if clip_val != -1:
clipped_grads, _ = tf.clip_by_global_norm(grads, self.config.max_gradient_norm)
self.global_grad = tf.global_norm(clipped_grads)
self.gradients = zip(clipped_grads, vars)
else:
self.global_grad = tf.global_norm(grads)
self.gradients = zip(grads, vars)
self.train_op = adam_optimizer.apply_gradients(self.gradients)
self.init = tf.global_variables_initializer()
def build_graph(self, weights, loss=None, optimizer=None, norm=False, batch_size=None, grad_ys=None):
if loss is not None:
gradients = tf.gradients(loss.node, list(utils.Utils.flatten(weights.node)), grad_ys)
gradients = [tf.check_numerics(g, 'gradient_%d' % i) for i, g in enumerate(gradients)]
if batch_size is not None:
gradients = [g / float(batch_size) for g in gradients]
# store gradients global norm before clipping
self.global_norm = tf.global_norm(gradients)
# clip gradients after global norm has been stored
if norm:
gradients, _ = tf.clip_by_global_norm(gradients, norm)
self.calculate = graph.TfNode(utils.Utils.reconstruct(gradients, weights.node))
if optimizer is not None:
self.ph_gradients = graph.Placeholders(weights)
self.apply = graph.TfNode(optimizer.node.apply_gradients(
utils.Utils.izip(self.ph_gradients.checked, weights.node)))
def _add_train_op(self):
"""Sets self._train_op, op to run for training."""
hps = self._hps
self._lr_rate = tf.maximum(
hps.min_lr, # min_lr_rate.
tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98))
tvars = tf.trainable_variables()
with tf.device(self._get_gpu(self._num_gpus-1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), hps.max_grad_norm)
tf.summary.scalar('global_norm', global_norm)
optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
tf.summary.scalar('learning rate', self._lr_rate)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
def _add_train_op(self):
"""Sets self._train_op, op to run for training."""
config = self._config
tvars = tf.trainable_variables()
with tf.device(self._get_gpu(self._num_gpus - 1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), config.max_grad_norm)
tf.summary.scalar('global_norm', global_norm)
lr_rate = tf.maximum(
config.min_lr, # min_lr_rate.
tf.train.exponential_decay(config.lr, self.global_step,
config.decay_steps, config.decay_rate))
optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)
tf.summary.scalar('learning_rate', lr_rate)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
def _add_train_op(self):
"""Sets self._train_op, op to run for training."""
config = self._config
tvars = tf.trainable_variables()
with tf.device(self._get_gpu(self._num_gpus - 1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), config.max_grad_norm)
tf.summary.scalar('global_norm', global_norm)
lr_rate = tf.maximum(
config.min_lr, # min_lr_rate.
tf.train.exponential_decay(config.lr, self.global_step, 30000, 0.98))
if config.optimizer == 'adam':
optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)
else:
assert config.optimizer == 'gradient_descent', config.optimizer
optimizer = tf.train.GradientDescentOptimizer(lr_rate)
tf.summary.scalar('learning_rate', lr_rate)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
def _add_train_op(self):
"""Sets self._train_op, op to run for training."""
config = self._config
tvars = tf.trainable_variables()
with tf.device(self._get_gpu(self._num_gpus - 1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), config.max_grad_norm)
tf.summary.scalar('global_norm', global_norm)
lr_rate = tf.maximum(
config.min_lr, # min_lr_rate.
tf.train.exponential_decay(config.lr, self.global_step, 30000, 0.98))
if config.optimizer == 'adam':
optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)
else:
assert config.optimizer == 'gradient_descent', config.optimizer
optimizer = tf.train.GradientDescentOptimizer(lr_rate)
tf.summary.scalar('learning_rate', lr_rate)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
''' Builds training graph. '''
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.variable_scope('SGD_Training'):
# SGD learning parameter
learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')
# collect all trainable variables
tvars = tf.trainable_variables()
grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
return adict(
learning_rate=learning_rate,
global_step=global_step,
global_norm=global_norm,
train_op=train_op)
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
''' Builds training graph. '''
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.variable_scope('SGD_Training'):
# SGD learning parameter
learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')
# collect all trainable variables
tvars = tf.trainable_variables()
grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
return adict(
learning_rate=learning_rate,
global_step=global_step,
global_norm=global_norm,
train_op=train_op)
def clip_grad_global_norms(tvars, loss, opt, global_norm=1, gate_gradients=1, gradient_noise_scale=4.0, GATE_GRAPH=2, grad_loss=None, agre_method=None, col_grad_ops=False):
"""Clips the gradients by the given value.
Args:
tvars: trainable variables used for gradint updates
loss: total loss of the network
opt: optimizer
global_norm: the maximum global norm
Returns:
A list of clipped gradient to variable pairs.
"""
var_refs = [v.ref() for v in tvars]
grads = tf.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=(
gate_gradients == 1), aggregation_method=agre_method, colocate_gradients_with_ops=col_grad_ops)
if gradient_noise_scale > 1:
grads = add_scaled_noise_to_gradients(
list(zip(grads, tvars)), gradient_noise_scale=gradient_noise_scale)
if gate_gradients == GATE_GRAPH:
grads = tf.tuple(grads)
grads, _ = tf.clip_by_global_norm(grads, global_norm)
grads_and_vars = list(zip(grads, tvars))
return grads_and_vars
def _clip_grad_global_norms(self, tvars, loss, opt, global_norm=8, gate_gradients=1, gradient_noise_scale=None, GATE_GRAPH=2, grad_loss=None, agre_method=None, col_grad_ops=False):
"""Clips the gradients by the given value.
Args:
tvars: trainable variables used for gradint updates
loss: total loss of the network
opt: optimizer
global_norm: the maximum global norm
Returns:
A list of clipped gradient to variable pairs.
"""
var_refs = [v.read_value() for v in tvars]
grads = tf.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=(
gate_gradients == 1), aggregation_method=agre_method, colocate_gradients_with_ops=col_grad_ops)
if gradient_noise_scale is not None:
grads = self._add_scaled_noise_to_gradients(
list(zip(grads, tvars)), gradient_noise_scale=gradient_noise_scale)
if gate_gradients == GATE_GRAPH:
grads = tf.tuple(grads)
grads, _ = tf.clip_by_global_norm(grads, global_norm)
grads_and_vars = list(zip(grads, tvars))
return grads_and_vars
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
''' Builds training graph. '''
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.variable_scope('SGD_Training'):
# SGD learning parameter
learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')
# collect all trainable variables
tvars = tf.trainable_variables()
grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
return adict(
learning_rate=learning_rate,
global_step=global_step,
global_norm=global_norm,
train_op=train_op)
def _build_optimizer(self):
"""Based on the loss tensor, build an optimizer that minimizes the loss.
This function returns an optimizer operation that updates the model's trainable parameters
by determining the loss's gradients w.r.t. each of the trainable parameters. Specifically,
RMSProp is used to minimize the loss. The gradients are clipped to the max_gradient_norm to
prevent too drastic updates of the trainable parameters. See also tf.clip_by_global_norm
Returns:
tf.Operation: An operation that updates the model's trainable parameters.
"""
# Clip the gradients
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self._loss, tvars), self.max_gradient_norm)
# Optimize the variables
optimizer = tf.train.RMSPropOptimizer(self._learning_rate)
return optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps])
self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
if is_training and config.keep_prob < 1:
lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
self._initial_state = cell.zero_state(batch_size, tf.float32)
iw = tf.get_variable("input_w", [1, size])
ib = tf.get_variable("input_b", [size])
inputs = [tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data)]
if is_training and config.keep_prob < 1:
inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]
outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size])
self._output = output = tf.nn.xw_plus_b(rnn_output,
tf.get_variable("out_w", [size, 1]),
tf.get_variable("out_b", [1]))
self._cost = cost = tf.reduce_mean(tf.square(output - tf.reshape(self._targets, [-1])))
self._final_state = states[-1]
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
#optimizer = tf.train.GradientDescentOptimizer(self.lr)
optimizer = tf.train.AdamOptimizer(self.lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def build_train(self, losses):
# TODO: modify total_loss to handle buckets
self.updates = None
with self.G.as_default():
# Gradients and SGD update operation for training the model.
params = tf.trainable_variables()
if not self.forward_only:
self.gradient_norms = []
self.updates = []
self.opt = tf.train.GradientDescentOptimizer(self.learning_rate)
for b in xrange(len(self.buckets)):
gradients = tf.gradients(self.losses[b], params)
clipped_gradients, norm = tf.clip_by_global_norm(gradients,
self.max_gradient_norm)
self.gradient_norms.append(norm)
self.updates.append(self.opt.apply_gradients(
zip(clipped_gradients, params), global_step=self.global_step))
return self.updates # note: this is per-bucket
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
''' Builds training graph. '''
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.variable_scope('SGD_Training'):
# SGD learning parameter
learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')
# collect all trainable variables
tvars = tf.trainable_variables()
grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
return adict(
learning_rate=learning_rate,
global_step=global_step,
global_norm=global_norm,
train_op=train_op)
def _add_train_op(self):
"""Sets self._train_op, op to run for training."""
hps = self._hps
self._lr_rate = tf.maximum(
hps.min_lr, # min_lr_rate.
tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98))
tvars = tf.trainable_variables()
with tf.device(self._get_gpu(self._num_gpus-1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), hps.max_grad_norm)
tf.scalar_summary('global_norm', global_norm)
optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
tf.scalar_summary('learning rate', self._lr_rate)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
def apply_gradients(self, grads_tvars, global_step=None, name=None):
self._grads, self._tvars = zip(*grads_tvars)
with tf.variable_scope("apply_updates"):
if self._clip_thresh_var is not None:
self._grads_clip, self._grads_norm = tf.clip_by_global_norm(self._grads, self._clip_thresh_var)
apply_grad_op = \
self._optimizer.apply_gradients(zip(self._grads_clip, self._tvars) )
else:
apply_grad_op = \
self._optimizer.apply_gradients(zip(self._grads, self._tvars) )
with tf.variable_scope("after_apply"):
after_apply_op = self.after_apply()
with tf.variable_scope("update_hyper"):
with tf.control_dependencies( [after_apply_op] ):
update_hyper_op = self.update_hyper_param()
with tf.control_dependencies([update_hyper_op] ):
self._increment_global_step_op = tf.assign(self._global_step, self._global_step + 1)
return tf.group(apply_grad_op, after_apply_op, update_hyper_op, self._increment_global_step_op)
def build_optimizer(loss, learning_rate, grad_clip):
'''
??Optimizer
loss: ??
learning_rate: ???
'''
# ??clipping gradients
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return optimizer
def _create_optimizer(self, args):
# Find negagtive log-likelihood of true actions
std_a = tf.exp(self.a_logstd)
pl_1 = 0.5 * tf.to_float(args.action_dim) * np.log(2. * np.pi)
pl_2 = tf.to_float(args.action_dim) * tf.reduce_sum(tf.log(std_a))
pl_3 = 0.5 * \
tf.reduce_mean(tf.reduce_sum(
tf.square((self.targets - self.a_mean) / std_a), 1))
policy_loss = pl_1 + pl_2 + pl_3
# Find overall loss
self.cost = policy_loss
self.summary_policy = tf.scalar_summary(
"Policy loss", tf.reduce_mean(policy_loss))
# Perform parameter update
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(
tf.gradients(self.cost, tvars), args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.train = optimizer.apply_gradients(zip(grads, tvars))
def _add_train_op(self):
params = self._params
self._lr_rate = tf.maximum(
params.min_lr,
tf.train.exponential_decay(params.lr, self._global_step, 30000, 0.98))
tvars = tf.trainable_variables()
# use reserved gpu for gradient computation
with tf.device(self._get_gpu(self._num_gpus-1)):
grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self._loss, tvars), params.max_grad_norm)
tf.scalar_summary('global_norm', global_norm)
optimizer = tf.train.AdamOptimizer(self._lr_rate)
tf.scalar_summary('learning rate', self._lr_rate)
with tf.device(self._next_device()):
self._train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self._global_step, name='train_step')
self._summaries = tf.merge_all_summaries()
return self._train_op, self._loss,
def set_optimizer(self, session, learning_rate=0.5, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
self.global_step = tf.Variable(0, trainable=False)
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
self.outputs, self.losses = self.calc_loss()
params = tf.trainable_variables()
for b in range(len(self.buckets)):
gradients = tf.gradients(self.losses[b], params)
clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
self.gradient_norms.append(norm)
self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))
self.saver = tf.train.Saver(tf.all_variables())
session.run(tf.initialize_all_variables())
if load_if_exist and self.train_dir:
saved = tf.train.get_checkpoint_state(self.train_dir)
if saved and tf.gfile.Exists(saved.model_checkpoint_path):
self.saver.restore(session, saved.model_checkpoint_path)
def set_optimizer(self, session, learning_rate=0.1, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
self.global_step = tf.Variable(0, trainable=False)
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
self.outputs, self.losses = self.calc_loss()
params = tf.trainable_variables()
for b in range(len(self.buckets)):
gradients = tf.gradients(self.losses[b], params)
clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
self.gradient_norms.append(norm)
self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))
self.saver = tf.train.Saver(tf.all_variables())
session.run(tf.initialize_all_variables())
if load_if_exist and self.train_dir:
saved = tf.train.get_checkpoint_state(self.train_dir)
if saved and tf.gfile.Exists(saved.model_checkpoint_path):
self.saver.restore(session, saved.model_checkpoint_path)
seq2seq_trainer.py 文件源码
项目:DialogueBreakdownDetection2016
作者: icoxfog417
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def set_optimizer(self, session, learning_rate=0.5, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
self.global_step = tf.Variable(0, trainable=False)
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
self.outputs, self.losses = self.calc_loss()
params = tf.trainable_variables()
for b in range(len(self.buckets)):
gradients = tf.gradients(self.losses[b], params)
clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
self.gradient_norms.append(norm)
self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))
self.saver = tf.train.Saver(tf.all_variables())
session.run(tf.initialize_all_variables())
if load_if_exist and self.train_dir:
saved = tf.train.get_checkpoint_state(self.train_dir)
if saved and tf.gfile.Exists(saved.model_checkpoint_path):
self.saver.restore(session, saved.model_checkpoint_path)