def _apply_dense(self, grad, var):
lr = (self._lr_t *
math_ops.sqrt(1 - self._beta2_power)
/ (1 - self._beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - self._beta1_t)
m_t = m * self._beta1_t
m_t = m_t + m_scaled_g_values
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = tf.pow(grad, 2) * (1 - self._beta2_t)
v_t = v * self._beta2_t
v_t = v_t + v_scaled_g_values
v_sqrt = tf.pow(v_t, self._pow_t)
var_update = state_ops.assign_sub(var,
lr * m_t / (v_sqrt + self._epsilon_t),
use_locking=self._use_locking)
# regularization
var_update = state_ops.assign_sub(var_update,
self._dense_regularization * var,
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t])
评论列表
文章目录