def dist_to_opt(self):
dist_to_opt_ops = []
# running average of the norm of gradeint
self._grad_norm = tf.sqrt(self._grad_norm_squared)
avg_op = self._moving_averager.apply([self._grad_norm, ])
dist_to_opt_ops.append(avg_op)
with tf.control_dependencies([avg_op]):
self._grad_norm_avg = self._moving_averager.average(
self._grad_norm)
# single iteration distance estimation
# note that self._grad_norm_avg is per variable
self._dist_to_opt = (self._grad_norm_avg
/ (self._grad_norm_squared_avg + EPS) )
# running average of distance
avg_op = self._moving_averager.apply([self._dist_to_opt])
dist_to_opt_ops.append(avg_op)
with tf.control_dependencies([avg_op]):
self._dist_to_opt_avg = tf.identity(
self._moving_averager.average(self._dist_to_opt))
if self._sparsity_debias:
self._dist_to_opt_avg /= (tf.sqrt(self._sparsity_avg) + EPS)
return dist_to_opt_ops
评论列表
文章目录