def _apply_dense(self, grad, weight):
learning_rate_t = tf.cast(self._lr_t, weight.dtype.base_dtype)
mu_t = tf.cast(self._mu_t, weight.dtype.base_dtype)
norm_t = tf.cast(self._norm_t, weight.dtype.base_dtype)
momentum = self.get_slot(weight, "a")
norm = self.get_slot(weight, "n")
if momentum.get_shape().ndims == 2:
momentum_mean = tf.reduce_mean(momentum, axis=1, keep_dims=True)
elif momentum.get_shape().ndims == 1:
momentum_mean = momentum
else:
momentum_mean = momentum
norm_update = learning_rate_t / norm + norm
norm_t = tf.assign(norm_t, norm_update)
momentum_update = (grad / norm_t) + (mu_t * momentum_mean)
momentum_t = tf.assign(momentum, momentum_update,
use_locking=self._use_locking)
weight_update = learning_rate_t * momentum_t
weight_t = tf.assign_sub(
weight, weight_update, use_locking=self._use_locking)
return tf.group(*[weight_t, norm_t, momentum_t])
python类assign_sub()的实例源码
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
# Can't use 1e-8 due to underflow
eps = 1e-7
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def apply_updates(self, model, grads):
"""
Updates the model parameters based on the given gradients, using momentum
"""
update_ops = []
mom_ops = []
if isinstance(self._learning_rate, list):
lrs = self._learning_rate
print('d')
else:
lrs = [self._learning_rate for p in model.model_params]
with tf.name_scope('CDLearning/updates'):
for param, grad, mv, lr in zip(model.model_params, grads, self._momentum_vector, lrs):
mv = tf.assign(mv, self._momentum * mv + grad * lr)
update_ops.append(tf.assign_sub(param, mv))
return update_ops, mom_ops
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
else:
eps = 1e-8
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = grad / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t])
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
tensorflow_backend.py 文件源码
项目:deep-learning-keras-projects
作者: jasmeetsb
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
def _initAssignSubFetch(self, x, y, use_gpu=False):
"""Initialize a param to init, and compute param -= y."""
with self.test_session(force_gpu=use_gpu):
p = tf.Variable(x)
sub = tf.assign_sub(p, y)
p.initializer.run()
new_value = sub.eval()
return p.eval(), new_value
def testAssignUpdate(self):
var = state_ops.variable_op([1, 2], tf.complex64)
added = tf.assign_add(var, [[2.0+2.0j, 3.0+3.0j]])
self.assertEqual([1, 2], added.get_shape())
subbed = tf.assign_sub(var, [[12.0+12.0j, 13.0+13.0j]])
self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoVarShape(self):
var = state_ops.variable_op([1, 2], tf.complex64, set_shape=False)
added = tf.assign_add(var, [[2.0+2.0j, 3.0+3.0j]])
self.assertEqual([1, 2], added.get_shape())
subbed = tf.assign_sub(var, [[12.0+12.0j, 13.0+13.0j]])
self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoValueShape(self):
var = state_ops.variable_op([1, 2], tf.complex64)
added = tf.assign_add(var, self._NewShapelessTensor())
self.assertEqual([1, 2], added.get_shape())
subbed = tf.assign_sub(var, self._NewShapelessTensor())
self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoShape(self):
var = state_ops.variable_op([1, 2], tf.complex64, set_shape=False)
added = tf.assign_add(var, self._NewShapelessTensor())
self.assertEqual(tensor_shape.unknown_shape(), added.get_shape())
subbed = tf.assign_sub(var, self._NewShapelessTensor())
self.assertEqual(tensor_shape.unknown_shape(), subbed.get_shape())
def _clip_dense(self, var):
with self._maybe_colocate_with(var):
updated_var_value = var._ref()
normalized_var = tf.clip_by_norm(
updated_var_value, self._max_norm, self._vars_to_clip_dims[var])
delta = updated_var_value - normalized_var
with tf.colocate_with(var):
return var.assign_sub(delta, use_locking=self._use_locking)
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power = tf.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * tf.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking)
with tf.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# m_bar = (1 - beta1) * g_t + beta1 * m_t
m_bar = m_scaled_g_values + beta1_t * m_t
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = tf.assign(v, v * beta2_t, use_locking=self._use_locking)
with tf.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
v_sqrt = tf.sqrt(v_t)
var_update = tf.assign_sub(
var, lr * m_bar / (v_sqrt + epsilon_t), use_locking=self._use_locking)
return tf.group(*[var_update, m_bar, v_t])
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
def _finish(self, caches):
""""""
if self.clip > 0:
S_t = [cache['s_t'] for cache in caches]
S_t, _ = tf.clip_by_global_norm(S_t, self.clip)
for cache, s_t in zip(caches, S_t):
cache['s_t'] = s_t
for cache in caches:
x_tm1 = cache['x_tm1']
s_t = cache['s_t']
updates = cache['updates']
with tf.name_scope('update_' + x_tm1.op.name), tf.device(x_tm1.device):
if 'idxs' in cache:
idxs = cache['idxs']
x_t = tf.scatter_sub(x_tm1, idxs, s_t)
if self.chi > 0:
x_t_ = tf.gather(x_t, idxs)
x_bar_t, t_x_bar = self._sparse_moving_average(x_tm1, idxs, x_t_, 'x', beta=self.chi)
else:
x_t = tf.assign_sub(x_tm1, s_t)
if self.chi > 0:
x_bar_t, t_x_bar = self._dense_moving_average(x_tm1, x_t, 'x', beta=self.chi)
updates.append(x_t)
if self.chi > 0:
updates.extend([x_bar_t, t_x_bar])
update_ops = [tf.group(*cache['updates']) for cache in caches]
return tf.group(*update_ops, name='update')
#==============================================================
def _finish(self, caches):
""""""
if self.clip > 0:
S_t = [cache['s_t'] for cache in caches]
S_t, _ = tf.clip_by_global_norm(S_t, self.clip)
for cache, s_t in zip(caches, S_t):
cache['s_t'] = s_t
for cache in caches:
x_tm1 = cache['x_tm1']
s_t = cache['s_t']
updates = cache['updates']
with tf.name_scope('update_' + x_tm1.op.name), tf.device(x_tm1.device):
if 'idxs' in cache:
idxs = cache['idxs']
x_t = tf.scatter_sub(x_tm1, idxs, s_t)
if self.chi > 0:
x_t_ = tf.gather(x_t, idxs)
x_bar_t, t_x_bar = self._sparse_moving_average(x_tm1, idxs, x_t_, 'x', beta=self.chi)
else:
x_t = tf.assign_sub(x_tm1, s_t)
if self.chi > 0:
x_bar_t, t_x_bar = self._dense_moving_average(x_tm1, x_t, 'x', beta=self.chi)
updates.append(x_t)
if self.chi > 0:
updates.extend([x_bar_t, t_x_bar])
update_ops = [tf.group(*cache['updates']) for cache in caches]
return tf.group(*update_ops, name='update')
#==============================================================
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
def update_sub(x, decrement):
return tf.assign_sub(x, decrement)
def _finish(self, caches):
""""""
if self.clip > 0:
S_t = [cache['s_t'] for cache in caches]
S_t, _ = tf.clip_by_global_norm(S_t, self.clip)
for cache, s_t in zip(caches, S_t):
cache['s_t'] = s_t
for cache in caches:
x_tm1 = cache['x_tm1']
s_t = cache['s_t']
updates = cache['updates']
with tf.name_scope('update_' + x_tm1.op.name), tf.device(x_tm1.device):
if 'idxs' in cache:
idxs = cache['idxs']
x_t = tf.scatter_sub(x_tm1, idxs, s_t)
if self.chi > 0:
x_t_ = tf.gather(x_t, idxs)
x_bar_t, t_x_bar = self._sparse_moving_average(x_tm1, idxs, x_t_, 'x', beta=self.chi)
else:
x_t = tf.assign_sub(x_tm1, s_t)
if self.chi > 0:
x_bar_t, t_x_bar = self._dense_moving_average(x_tm1, x_t, 'x', beta=self.chi)
updates.append(x_t)
if self.chi > 0:
updates.extend([x_bar_t, t_x_bar])
update_ops = [tf.group(*cache['updates']) for cache in caches]
return tf.group(*update_ops, name='update')
#==============================================================
def _op_apply_delta(self):
assigns = []
for var, delta in zip(self.variables, self._unpack(self._op_delta)):
assigns.append(tf.assign_sub(var, delta))
return tf.group(*assigns)
def batch_norm(x,
is_training,
gamma=None,
beta=None,
axes=[0, 1, 2],
eps=1e-10,
name="bn_out",
decay=0.99,
dtype=tf.float32):
"""Applies batch normalization.
Collect mean and variances on x except the last dimension. And apply
normalization as below:
x_ = gamma * (x - mean) / sqrt(var + eps) + beta
Args:
x: Input tensor, [B, ...].
n_out: Integer, depth of input variable.
gamma: Scaling parameter.
beta: Bias parameter.
axes: Axes to collect statistics.
eps: Denominator bias.
Returns:
normed: Batch-normalized variable.
mean: Mean used for normalization (optional).
"""
n_out = x.get_shape()[-1]
try:
n_out = int(n_out)
shape = [n_out]
except:
shape = None
emean = tf.get_variable(
"ema_mean",
shape=shape,
trainable=False,
dtype=dtype,
initializer=tf.constant_initializer(
0.0, dtype=dtype))
evar = tf.get_variable(
"ema_var",
shape=shape,
trainable=False,
dtype=dtype,
initializer=tf.constant_initializer(
1.0, dtype=dtype))
if is_training:
mean, var = tf.nn.moments(x, axes, name="moments")
ema_mean_op = tf.assign_sub(emean, (emean - mean) * (1 - decay))
ema_var_op = tf.assign_sub(evar, (evar - var) * (1 - decay))
normed = tf.nn.batch_normalization(
x, mean, var, beta, gamma, eps, name=name)
return normed, [ema_mean_op, ema_var_op]
else:
normed = tf.nn.batch_normalization(
x, emean, evar, beta, gamma, eps, name=name)
return normed, None