def _dense_moving_average(self, x_tm1, a_t, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[])
t = tf.assign_add(tm1, 1)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t = beta_t * (1-beta**tm1) / (1-beta**t)
else:
beta_t = tm1 / t
b_t = tf.assign(b_tm1, beta_t*b_tm1)
b_t = tf.assign_add(b_t, (1-beta_t)*a_t)
return b_t, t
#=============================================================
python类assign_add()的实例源码
def tabular_learning_with_lr(init_lr, decay_steps, Qs_t, states_t, actions_t, targets):
reusing_scope = tf.get_variable_scope().reuse
state_action_pairs = tf.stack([states_t, actions_t], 1)
estimates = tf.gather_nd(Qs_t, state_action_pairs)
err_estimates = targets - estimates
loss = tf.reduce_mean(err_estimates)
global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
lr = tf.train.exponential_decay(tf.constant(init_lr, dtype=tf.float32), global_step, decay_steps, 0.5, staircase=True)
if reusing_scope is False:
tf.summary.scalar('lr', lr)
inc_global_step = global_step.assign_add(1)
with tf.control_dependencies([inc_global_step]):
updates = lr * err_estimates
train_op = tf.scatter_nd_add(Qs_t, state_action_pairs, updates)
return loss, train_op
def _dense_moving_average(self, x_tm1, a_t, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[])
t = tf.assign_add(tm1, 1)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t = beta_t * (1-beta**tm1) / (1-beta**t)
else:
beta_t = tm1 / t
b_t = tf.assign(b_tm1, beta_t*b_tm1)
b_t = tf.assign_add(b_t, (1-beta_t)*a_t)
return b_t, t
#=============================================================
def apply_gradients(self, grads):
coldOptim = tf.train.MomentumOptimizer(
self._cold_lr * (1. - self._momentum), self._momentum)
def coldSGDstart():
sgd_step_op = tf.assign_add(self.sgd_step, 1)
coldOptim_op = coldOptim.apply_gradients(grads)
if KFAC_DEBUG:
with tf.control_dependencies([sgd_step_op, coldOptim_op]):
sgd_step_op = tf.Print(
sgd_step_op, [self.sgd_step, tf.convert_to_tensor('doing cold sgd step')])
return tf.group(*[sgd_step_op, coldOptim_op])
kfacOptim_op, qr = self.apply_gradients_kfac(grads)
def warmKFACstart():
return kfacOptim_op
return tf.cond(tf.greater(self.sgd_step, self._cold_iter), warmKFACstart, coldSGDstart), qr
def model(features, labels, mode):
W = tf.get_variable("W", [1], dtype = tf.float64)
b = tf.get_variable("b", [1], dtype = tf.float64)
y = W * features['x'] + b
#loss sub-graph
loss = tf.reduce_sum(tf.square(y - labels))
#training sub-graph
global_step = tf.train.get_global_step()
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = tf.group(optimizer.minimize(loss), tf.assign_add(global_step, 1))
#modelFnOps connects subgraphs we built
return tf.contrib.learn.ModelFnOps(mode = mode, predictions = y, loss = loss, train_op = train)
def model(features, labels, mode, params):
with tf.device("/cpu:0"):
# Build a linear model and predict values
W = tf.get_variable("W", [1], dtype=tf.float64)
b = tf.get_variable("b", [1], dtype=tf.float64)
y = W * features[:, 0] + b
# Loss sub-graph
loss = tf.reduce_sum(tf.square(y - labels))
# Training sub-graph
global_step = tf.train.get_global_step()
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = tf.group(optimizer.minimize(loss),
tf.assign_add(global_step, 1))
# ModelFnOps connects subgraphs we built to the
# appropriate functionality.
return tf.contrib.learn.estimators.model_fn.ModelFnOps(
mode=mode, predictions=y,
loss=loss,
train_op=train)
def _dense_moving_average(self, x_tm1, a_t, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[])
t = tf.assign_add(tm1, 1)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t = beta_t * (1-beta**tm1) / (1-beta**t)
else:
beta_t = tm1 / t
b_t = tf.assign(b_tm1, beta_t*b_tm1)
b_t = tf.assign_add(b_t, (1-beta_t)*a_t)
return b_t, t
#=============================================================
def __init__(self, env_config, grad_clip_norm=None, log_dir='logs/examples',
**kwargs):
self.env_config = env_config
self.grad_clip_norm = grad_clip_norm
self.log_dir = log_dir
self.placeholders = {}
self.training_op = None
self.merged = None
self._saver = None
self._writer = None
self.callbacks = []
self.global_step_sy = tf.Variable(1, name='global_step', trainable=False)
placeholders_config = {'add_to_global_step': [[], tf.int32]}
self._create_placeholders(placeholders_config)
self.increase_global_step_op = tf.assign_add(
self.global_step_sy,
self.placeholders['add_to_global_step'],
name='increase_global_step')
def __init__(self,
graph,
exploration_steps,
total_steps,
gamma,
a3c_update_interval,
action_sampler):
"""
graph should have the placeholders called "states", "actions",
and "returns". It should also have operations called "loss_op", "train_op",
"probs", and "value".
"""
self.graph = graph
self.gamma = gamma
self.a3c_update_interval = a3c_update_interval
self.action_sampler = action_sampler
self.T = graph.get_collection("global_step")[0]
self.exploration_steps = exploration_steps
self.total_steps = total_steps
self.incr_T = tf.assign_add(self.T, 1)
def create_fisher_ops(self):
self.fisher_diagonal = self.bias_shaped_variables(name='bias_grads2', c=0.0, trainable=False) +\
self.weight_shaped_variables(name='weight_grads2', c=0.0, trainable=False)
self.fisher_accumulate_op = [tf.assign_add(f1, f2) for f1, f2 in zip(self.fisher_diagonal, self.fisher_minibatch)]
scale = 1 / float(self.ewc_batches * self.ewc_batch_size)
self.fisher_full_batch_average_op = [tf.assign(var, scale * var) for var in self.fisher_diagonal]
self.fisher_zero_op = [tf.assign(tensor, tf.zeros_like(tensor)) for tensor in self.fisher_diagonal]
def test_some_initialized(self):
with self.test_session() as sess:
x = tf.Variable(tf.zeros([]))
self.assertEqual([x], tdc._init_uninitialized(sess))
self.assertEqual(0, sess.run(x))
y = tf.assign_add(x, 1)
self.assertEqual([], tdc._init_uninitialized(sess))
self.assertEqual(1, sess.run(y))
self.assertEqual([], tdc._init_uninitialized(sess))
# If we had done initialize_all_variables we'd see 1.
self.assertEqual(2, sess.run(y))
def loss(loss_value):
"""Calculates aggregated mean loss."""
total_loss = tf.Variable(0.0, False)
loss_count = tf.Variable(0, False)
total_loss_update = tf.assign_add(total_loss, loss_value)
loss_count_update = tf.assign_add(loss_count, 1)
loss_op = total_loss / tf.cast(loss_count, tf.float32)
return [total_loss_update, loss_count_update], loss_op
def accuracy(logits, labels):
"""Calculates aggregated accuracy."""
is_correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.reduce_sum(tf.cast(is_correct, tf.int32))
incorrect = tf.reduce_sum(tf.cast(tf.logical_not(is_correct), tf.int32))
correct_count = tf.Variable(0, False)
incorrect_count = tf.Variable(0, False)
correct_count_update = tf.assign_add(correct_count, correct)
incorrect_count_update = tf.assign_add(incorrect_count, incorrect)
accuracy_op = tf.cast(correct_count, tf.float32) / tf.cast(
correct_count + incorrect_count, tf.float32)
return [correct_count_update, incorrect_count_update], accuracy_op
def loss(loss_value):
"""Calculates aggregated mean loss."""
total_loss = tf.Variable(0.0, False)
loss_count = tf.Variable(0, False)
total_loss_update = tf.assign_add(total_loss, loss_value)
loss_count_update = tf.assign_add(loss_count, 1)
loss_op = total_loss / tf.cast(loss_count, tf.float32)
return [total_loss_update, loss_count_update], loss_op
def accuracy(logits, labels):
"""Calculates aggregated accuracy."""
is_correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.reduce_sum(tf.cast(is_correct, tf.int32))
incorrect = tf.reduce_sum(tf.cast(tf.logical_not(is_correct), tf.int32))
correct_count = tf.Variable(0, False)
incorrect_count = tf.Variable(0, False)
correct_count_update = tf.assign_add(correct_count, correct)
incorrect_count_update = tf.assign_add(incorrect_count, incorrect)
accuracy_op = tf.cast(correct_count, tf.float32) / tf.cast(
correct_count + incorrect_count, tf.float32)
return [correct_count_update, incorrect_count_update], accuracy_op
def loss(loss_value):
"""Calculates aggregated mean loss."""
total_loss = tf.Variable(0.0, False)
loss_count = tf.Variable(0, False)
total_loss_update = tf.assign_add(total_loss, loss_value)
loss_count_update = tf.assign_add(loss_count, 1)
loss_op = total_loss / tf.cast(loss_count, tf.float32)
return [total_loss_update, loss_count_update], loss_op
def update_add(x, increment):
return tf.assign_add(x, increment)
def _apply_stats(self, statsUpdates, accumulate=False, accumulateCoeff=0.):
updateOps = []
# obtain the stats var list
for stats_var in statsUpdates:
stats_new = statsUpdates[stats_var]
if accumulate:
# simple superbatch averaging
update_op = tf.assign_add(
stats_var, accumulateCoeff * stats_new, use_locking=True)
else:
# exponential running averaging
update_op = tf.assign(
stats_var, stats_var * self._stats_decay, use_locking=True)
update_op = tf.assign_add(
update_op, (1. - self._stats_decay) * stats_new, use_locking=True)
updateOps.append(update_op)
with tf.control_dependencies(updateOps):
stats_step_op = tf.assign_add(self.stats_step, 1)
if KFAC_DEBUG:
stats_step_op = (tf.Print(stats_step_op,
[tf.convert_to_tensor('step:'),
self.global_step,
tf.convert_to_tensor('fac step:'),
self.factor_step,
tf.convert_to_tensor('sgd step:'),
self.sgd_step,
tf.convert_to_tensor('Accum:'),
tf.convert_to_tensor(accumulate),
tf.convert_to_tensor('Accum coeff:'),
tf.convert_to_tensor(accumulateCoeff),
tf.convert_to_tensor('stat step:'),
self.stats_step, updateOps[0], updateOps[1]]))
return [stats_step_op, ]
def applyStatsEigen(self, eigen_list):
updateOps = []
print(('updating %d eigenvalue/vectors' % len(eigen_list)))
for i, (tensor, mark) in enumerate(zip(eigen_list, self.eigen_update_list)):
stats_eigen_var = self.eigen_reverse_lookup[mark]
updateOps.append(
tf.assign(stats_eigen_var, tensor, use_locking=True))
with tf.control_dependencies(updateOps):
factor_step_op = tf.assign_add(self.factor_step, 1)
updateOps.append(factor_step_op)
if KFAC_DEBUG:
updateOps.append(tf.Print(tf.constant(
0.), [tf.convert_to_tensor('updated kfac factors')]))
return updateOps
def apply_gradients(self, grads):
coldOptim = tf.train.MomentumOptimizer(
self._cold_lr, self._momentum)
def coldSGDstart():
sgd_grads, sgd_var = zip(*grads)
if self.max_grad_norm != None:
sgd_grads, sgd_grad_norm = tf.clip_by_global_norm(sgd_grads,self.max_grad_norm)
sgd_grads = list(zip(sgd_grads,sgd_var))
sgd_step_op = tf.assign_add(self.sgd_step, 1)
coldOptim_op = coldOptim.apply_gradients(sgd_grads)
if KFAC_DEBUG:
with tf.control_dependencies([sgd_step_op, coldOptim_op]):
sgd_step_op = tf.Print(
sgd_step_op, [self.sgd_step, tf.convert_to_tensor('doing cold sgd step')])
return tf.group(*[sgd_step_op, coldOptim_op])
kfacOptim_op, qr = self.apply_gradients_kfac(grads)
def warmKFACstart():
return kfacOptim_op
return tf.cond(tf.greater(self.sgd_step, self._cold_iter), warmKFACstart, coldSGDstart), qr