def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1):
"""Inverse-decay learning rate until warmup_steps, then decay."""
warmup_steps = tf.to_float(
hparams.learning_rate_warmup_steps * num_worker_replicas)
step = tf.to_float(tf.train.get_or_create_global_step())
if hparams.learning_rate_decay_scheme == "noam":
return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum(
(step + 1) * warmup_steps**-1.5, (step + 1)**-0.5)
elif hparams.learning_rate_decay_scheme == "exp100k":
return 0.94**(step // 100000)
elif hparams.learning_rate_decay_scheme == "cosine":
cycle_steps = hparams.learning_rate_cosine_cycle_steps
return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps))
elif hparams.learning_rate_decay_scheme == "cyclelinear10x":
# Cycle the rate linearly by 10x every warmup_steps, up and down.
cycle_steps = hparams.learning_rate_warmup_steps
cycle_position = step % (2 * cycle_steps)
cycle_position = tf.to_float( # Normalize to the interval [-1, 1].
cycle_position - cycle_steps) / float(cycle_steps)
cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0.
return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3).
inv_base = tf.exp(tf.log(0.01) / warmup_steps)
inv_decay = inv_base**(warmup_steps - step)
if hparams.learning_rate_decay_scheme == "sqrt":
decay = _sqrt_decay(step - warmup_steps)
elif hparams.learning_rate_decay_scheme == "exp10k":
decay = _exp_decay_after(step - warmup_steps, 0.9995,
num_train_steps - warmup_steps - 10000)
elif hparams.learning_rate_decay_scheme == "exp50k":
decay = _exp_decay_after(step - warmup_steps, 0.99995,
num_train_steps - warmup_steps - 50000)
elif hparams.learning_rate_decay_scheme == "exp500k":
decay = _exp_decay_after(step - warmup_steps, 0.9999955,
num_train_steps - warmup_steps - 500000)
elif hparams.learning_rate_decay_scheme == "none":
decay = tf.constant(1.0)
else:
raise ValueError("Unrecognized learning rate decay scheme: %s" %
hparams.learning_rate_decay_scheme)
return tf.where(step < warmup_steps, inv_decay, decay)
评论列表
文章目录