def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
b_tm1_ = tf.gather(b_tm1, idxs)
shape = self.get_variable_shape(x_tm1)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[shape[0]]+[1]*(len(shape)-1))
tm1_ = tf.gather(tm1, idxs)
t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_))
t_ = tf.gather(t, idxs)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t_ = beta_t * (1-beta_t**tm1_) / (1-beta_t**t_)
else:
beta_t_ = tm1_/t_
b_t = tf.scatter_update(b_tm1, idxs, beta_t_*b_tm1_)
b_t = tf.scatter_add(b_t, idxs, (1-beta_t_)*a_t_)
return b_t, t
#=============================================================
python类scatter_add()的实例源码
def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
b_tm1_ = tf.gather(b_tm1, idxs)
shape = self.get_variable_shape(x_tm1)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[shape[0]]+[1]*(len(shape)-1))
tm1_ = tf.gather(tm1, idxs)
t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_))
t_ = tf.gather(t, idxs)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t_ = beta_t * (1-beta_t**tm1_) / (1-beta_t**t_)
else:
beta_t_ = tm1_/t_
b_t = tf.scatter_update(b_tm1, idxs, beta_t_*b_tm1_)
b_t = tf.scatter_add(b_t, idxs, (1-beta_t_)*a_t_)
return b_t, t
#=============================================================
def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9):
""""""
b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
b_tm1_ = tf.gather(b_tm1, idxs)
shape = self.get_variable_shape(x_tm1)
tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[shape[0]]+[1]*(len(shape)-1))
tm1_ = tf.gather(tm1, idxs)
t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_))
t_ = tf.gather(t, idxs)
if beta < 1:
beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
beta_t_ = beta_t * (1-beta_t**tm1_) / (1-beta_t**t_)
else:
beta_t_ = tm1_/t_
b_t = tf.scatter_update(b_tm1, idxs, beta_t_*b_tm1_)
b_t = tf.scatter_add(b_t, idxs, (1-beta_t_)*a_t_)
return b_t, t
#=============================================================
def outer_product(*inputs):
"""Computes outer product.
Args:
inputs: a list of 1-D `Tensor` (vector)
"""
inputs = list(inputs)
order = len(inputs)
for idx, input_ in enumerate(inputs):
if len(input_.get_shape()) == 1:
inputs[idx] = tf.reshape(input_, [-1, 1] if idx % 2 == 0 else [1, -1])
if order == 2:
output = tf.multiply(inputs[0], inputs[1])
elif order == 3:
size = []
idx = 1
for i in xrange(order):
size.append(inputs[i].get_shape()[0])
output = tf.zeros(size)
u, v, w = inputs[0], inputs[1], inputs[2]
uv = tf.multiply(inputs[0], inputs[1])
for i in xrange(self.size[-1]):
output = tf.scatter_add(output, [0,0,i], uv)
return output
def make_update_op(self, upd_idxs, upd_keys, upd_vals,
batch_size, use_recent_idx, intended_output):
"""Function that creates all the update ops."""
base_update_op = super(LSHMemory, self).make_update_op(
upd_idxs, upd_keys, upd_vals,
batch_size, use_recent_idx, intended_output)
hash_slot_idxs = self.get_hash_slots(upd_keys)
update_ops = []
with tf.control_dependencies([base_update_op]):
for i, slot_idxs in enumerate(hash_slot_idxs):
entry_idx = tf.random_uniform([batch_size],
maxval=self.num_per_hash_slot,
dtype=tf.int32)
entry_mul = 1 - tf.one_hot(entry_idx, self.num_per_hash_slot,
dtype=tf.int32)
entry_add = (tf.expand_dims(upd_idxs, 1) *
tf.one_hot(entry_idx, self.num_per_hash_slot,
dtype=tf.int32))
mul_op = tf.scatter_mul(
self.hash_slots[i], slot_idxs, entry_mul)
with tf.control_dependencies([mul_op]):
add_op = tf.scatter_add(
self.hash_slots[i], slot_idxs, entry_add)
update_ops.append(add_op)
return tf.group(*update_ops)
def test_state_grads():
with tf.Session() as sess:
v = tf.Variable([0., 0., 0.])
x = tf.ones((3,))
y0 = tf.assign(v, x)
y1 = tf.assign_add(v, x)
grad0 = tf.gradients(y0, [v, x])
grad1 = tf.gradients(y1, [v, x])
grad_vals = sess.run((grad0, grad1))
assert np.allclose(grad_vals[0][0], 0)
assert np.allclose(grad_vals[0][1], 1)
assert np.allclose(grad_vals[1][0], 1)
assert np.allclose(grad_vals[1][1], 1)
with tf.Session() as sess:
v = tf.Variable([0., 0., 0.])
x = tf.ones((1,))
y0 = tf.scatter_update(v, [0], x)
y1 = tf.scatter_add(v, [0], x)
grad0 = tf.gradients(y0, [v._ref(), x])
grad1 = tf.gradients(y1, [v._ref(), x])
grad_vals = sess.run((grad0, grad1))
assert np.allclose(grad_vals[0][0], [0, 1, 1])
assert np.allclose(grad_vals[0][1], 1)
assert np.allclose(grad_vals[1][0], 1)
assert np.allclose(grad_vals[1][1], 1)
def _scatter_f_var(self, dst, src, mode="update"):
# create a temporary variable for dst so that we can use the sparse
# variable updates. despite this looking incredibly inefficient, it is
# actually faster than the scatter_nd approach
# from tensorflow.python.ops import gen_state_ops
# var = gen_state_ops._temporary_variable(
# self.bases[dst.key].get_shape(), self.bases[dst.key].dtype)
# var_name = var.op.name
# var = tf.assign(var, self.bases[dst.key])
var = self.bases[dst.key]
if (dst.as_slice is not None and
var.get_shape().is_compatible_with(src.get_shape()) and
dst.indices[0] == 0 and
dst.indices[-1] == var.get_shape()[0].value - 1 and
len(dst.indices) == var.get_shape()[0]):
if mode == "inc":
result = tf.assign_add(var, src, use_locking=False)
else:
result = tf.assign(var, src, use_locking=False)
elif mode == "inc":
result = tf.scatter_add(var, dst.tf_indices, src,
use_locking=False)
else:
result = tf.scatter_update(var, dst.tf_indices, src,
use_locking=False)
# result = gen_state_ops._destroy_temporary_variable(var, var_name)
return result
def eps_greedy(inputs_t, q_preds_t, nb_actions, N0, min_eps, nb_state=None):
reusing_scope = tf.get_variable_scope().reuse
N0_t = tf.constant(N0, tf.float32, name='N0')
min_eps_t = tf.constant(min_eps, tf.float32, name='min_eps')
if nb_state == None:
N = tf.Variable(1., trainable=False, dtype=tf.float32, name='N')
eps = tf.maximum(N0_t / (N0_t + N), min_eps_t, name="eps")
update_N = tf.assign(N, N + 1)
if reusing_scope is False:
tf.summary.scalar('N', N)
else:
N = tf.Variable(tf.ones(shape=[nb_state]), name='N', trainable=False)
eps = tf.maximum(N0_t / (N0_t + N[inputs_t]), min_eps_t, name="eps")
update_N = tf.scatter_add(N, inputs_t, 1)
if reusing_scope is False:
tf.summary.histogram('N', N)
cond = tf.greater(tf.random_uniform([], 0, 1), eps)
pred_action = tf.cast(tf.argmax(q_preds_t, 0), tf.int32)
random_action = tf.random_uniform([], 0, nb_actions, dtype=tf.int32)
with tf.control_dependencies([update_N]): # Force the update call
action_t = tf.where(cond, pred_action, random_action)
return action_t
def tabular_eps_greedy(inputs_t, q_preds_t, nb_states, nb_actions, N0, min_eps):
reusing_scope = tf.get_variable_scope().reuse
Ns = tf.get_variable('Ns', shape=[nb_states], dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer())
if reusing_scope is False:
tf.summary.histogram('Ns', Ns)
update_Ns = tf.scatter_add(Ns, inputs_t, tf.ones_like(inputs_t, dtype=tf.float32))
eps = tf.maximum(
N0 / (N0 + tf.gather(Ns, inputs_t))
, min_eps
, name="eps"
)
nb_samples = tf.shape(q_preds_t)[0]
max_actions = tf.cast(tf.argmax(q_preds_t, 1), tf.int32)
probs_t = tf.sparse_to_dense(
sparse_indices=tf.stack([tf.range(nb_samples), max_actions], 1)
, output_shape=[nb_samples, nb_actions]
, sparse_values=1 - eps
, default_value=0.
) + tf.expand_dims(eps / nb_actions, 1)
conditions = tf.greater(tf.random_uniform([nb_samples], 0, 1), eps)
random_actions = tf.random_uniform(shape=[nb_samples], minval=0, maxval=nb_actions, dtype=tf.int32)
with tf.control_dependencies([update_Ns]): # Force the update call
actions_t = tf.where(conditions, max_actions, random_actions)
return actions_t, probs_t
def collect_gradients(gradients, variables):
ops = []
for grad, var in zip(gradients, variables):
if isinstance(grad, tf.Tensor):
ops.append(tf.assign_add(var, grad))
else:
ops.append(tf.scatter_add(var, grad.indices, grad.values))
return tf.group(*ops)
def conv(v, k):
"""Computes circular convolution.
Args:
v: a 1-D `Tensor` (vector)
k: a 1-D `Tensor` (kernel)
"""
size = int(v.get_shape()[0])
kernel_size = int(k.get_shape()[0])
kernel_shift = int(math.floor(kernel_size/2.0))
def loop(idx):
if idx < 0: return size + idx
if idx >= size : return idx - size
else: return idx
kernels = []
for i in xrange(size):
indices = [loop(i+j) for j in xrange(kernel_shift, -kernel_shift-1, -1)]
v_ = tf.gather(v, indices)
kernels.append(tf.reduce_sum(v_ * k, 0))
# # code with double loop
# for i in xrange(size):
# for j in xrange(kernel_size):
# idx = i + kernel_shift - j + 1
# if idx < 0: idx = idx + size
# if idx >= size: idx = idx - size
# w = tf.gather(v, int(idx)) * tf.gather(kernel, j)
# output = tf.scatter_add(output, [i], tf.reshape(w, [1, -1]))
return tf.pack(kernels)
def update_contextual_features(contextual_features, indices, updates,
flattened_idx_offset):
first_indices, second_indices = tf.split(1, 2, indices)
indices = tf.squeeze(first_indices + second_indices)
indices = indices + flattened_idx_offset
contextual_features = tf.scatter_add(contextual_features, indices,
updates, use_locking=None)
return contextual_features
def _thin_stack_lookup_gradient(op, grad_stack1, grad_stack2, grad_buf_top, _):
stack, buffer, _, _, buffer_cursors, transitions = op.inputs
stack2_ptrs = op.outputs[3]
t = op.get_attr("timestep")
batch_size = buffer_cursors.get_shape().as_list()[0]
num_tokens = buffer.get_shape().as_list()[0] / batch_size
batch_range = math_ops.range(batch_size)
batch_range_i = tf.to_float(batch_range)
grad_stack_name = "grad_stack_%i_%s" % (t, str(uuid.uuid4())[:15])
grad_buffer_name = "grad_buffer_%i_%s" % (t, str(uuid.uuid4())[:15])
grad_stack = gen_state_ops._temporary_variable(stack.get_shape().as_list(), tf.float32, grad_stack_name)
grad_buffer = gen_state_ops._temporary_variable(buffer.get_shape().as_list(), tf.float32, grad_buffer_name)
grad_stack = tf.assign(grad_stack, tf.zeros_like(grad_stack))
grad_buffer = tf.assign(grad_buffer, tf.zeros_like(grad_buffer))
updates = []
# Write grad_stack1 into block (t - 1)
if t >= 1:
in_cursors = (t - 1) * batch_size + batch_range
grad_stack = tf.scatter_add(grad_stack, in_cursors, grad_stack1)
# Write grad_stack2 using stored lookup pointers
grad_stack = floaty_scatter_add(grad_stack, stack2_ptrs * batch_size + batch_range_i, grad_stack2)
# Use buffer_cursors to scatter grads into buffer.
buffer_ptrs = tf.minimum((float) (num_tokens * batch_size) - 1.0,
buffer_cursors * batch_size + batch_range_i)
grad_buffer = floaty_scatter_add(grad_buffer, buffer_ptrs, grad_buf_top)
with tf.control_dependencies([grad_stack, grad_buffer]):
grad_stack = gen_state_ops._destroy_temporary_variable(grad_stack, grad_stack_name)
grad_buffer = gen_state_ops._destroy_temporary_variable(grad_buffer, grad_buffer_name)
with tf.control_dependencies([grad_stack, grad_buffer]):
return grad_stack, grad_buffer, None, None, None, None
# Deprecated custom gradient op.
#@ops.RegisterGradient("ThinStackLookup")