def __get_grad_noise_scale(self, gradients):
if self.cfg.grad_noise_decay is None:
grad_noise_scale = self.cfg.grad_noise_scale
elif self.cfg.grad_noise_decay == 'annealing':
"""
Adds annealed gaussian noise to the gradients at
every time step, by decaying the variance at each
time step
g_t <- g_t + N(0, sigma_t^2)
sigma_t^2 = eta / (1 + t)^gamma
with eta selected from {0.01, 0.3, 1.0) and
gamma = 0.55
See: "Adding gradient noise improves learning
for very deep networks",
http://arxiv.org/pdf/1511.06807v1.pdf
"""
eta = self.cfg.grad_noise_scale ** 0.5
gamma = 0.55 / 2
grad_noise_scale = eta * tf.pow(tf.cast(
self.global_step + 1, self.cfg._FLOATX), -gamma)
elif self.cfg.grad_noise_decay == 'neural_gpu':
if self.prev_err is None:
grad_noise_scale = self.cfg.grad_noise_scale
else:
eta = self.cfg.grad_noise_scale
gamma = 0.55
grad_noise_scale = eta * tf.sqrt(
self.prev_err * tf.pow(tf.cast(
self.global_step + 1, self.cfg._FLOATX), -gamma))
else:
# Raise ValueError
raise NotImplementedError('Unknown value of '
'cfg.grad_noise_decay: %s' %
self.cfg.grad_noise_decay)
return grad_noise_scale
评论列表
文章目录