def _cumprod(tensor, axis=0):
"""A custom version of cumprod to prevent NaN gradients when there are zeros in `tensor`
as reported here: https://github.com/tensorflow/tensorflow/issues/3862
:param tensor: tf.Tensor
:return: tf.Tensor
"""
transpose_permutation = None
n_dim = len(tensor.get_shape())
if n_dim > 1 and axis != 0:
if axis < 0:
axis += n_dim
transpose_permutation = np.arange(n_dim)
transpose_permutation[-1], transpose_permutation[0] = 0, axis
tensor = tf.transpose(tensor, transpose_permutation)
def prod(acc, x):
return acc * x
prob = tf.scan(prod, tensor)
tensor = tf.transpose(prob, transpose_permutation)
return tensor
python类cumprod()的实例源码
def calculate_allocation_weighting(self, usage_vector):
"""
:param: usage vector: tensor of shape [batch_size, memory_size]
:return: allocation tensor of shape [batch_size, memory_size]
"""
usage_vector = Memory.epsilon + (1 - Memory.epsilon) * usage_vector
# We're sorting the "-self.usage_vector" because top_k returns highest values and we need the lowest
highest_usage, inverse_indices = tf.nn.top_k(-usage_vector, k=self.memory_size)
lowest_usage = -highest_usage
allocation_scrambled = (1 - lowest_usage) * tf.cumprod(lowest_usage, axis=1, exclusive=True)
# allocation is not in the correct order. alloation[i] contains the sorted[i] value
# reversing the already inversed indices for each batch
indices = tf.stack([tf.invert_permutation(batch_indices) for batch_indices in tf.unstack(inverse_indices)])
allocation = tf.stack([tf.gather(mem, ind)
for mem, ind in
zip(tf.unstack(allocation_scrambled), tf.unstack(indices))])
return allocation
def unpool(pool, ind, shape, ksize=[1, 2, 2, 1], scope=None):
with tf.name_scope(scope):
input_shape = tf.shape(pool)
output_shape = [input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3]]
flat_input_size = tf.cumprod(input_shape)[-1]
flat_output_shape = tf.stack([output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]])
pool_ = tf.reshape(pool, tf.stack([flat_input_size]))
batch_range = tf.reshape(tf.range(tf.cast(output_shape[0], tf.int64), dtype=ind.dtype),
shape=tf.stack([input_shape[0], 1, 1, 1]))
b = tf.ones_like(ind) * batch_range
b = tf.reshape(b, tf.stack([flat_input_size, 1]))
ind_ = tf.reshape(ind, tf.stack([flat_input_size, 1]))
ind_ = tf.concat([b, ind_], 1)
ret = tf.scatter_nd(ind_, pool_, shape=tf.cast(flat_output_shape, tf.int64))
ret = tf.reshape(ret, tf.stack(output_shape))
ret = tf.reshape(ret, shape=shape)
return ret
def preturn_network(rewards, discounts, values):
# First reward must be zero, first discount must be one
first_reward = tf.Assert(
tf.reduce_all(tf.equal(rewards[:, 0, :], 0.0)), [rewards[:, 0, :]])
first_discount = tf.Assert(
tf.reduce_all(tf.equal(discounts[:, 0, :], 1.0)), [discounts[:, 0, :]])
with tf.control_dependencies([first_reward, first_discount]):
with tf.variable_scope('preturn'):
accum_value_discounts = tf.cumprod(discounts, axis=1, exclusive=False)
accum_reward_discounts = tf.cumprod(discounts, axis=1, exclusive=True)
discounted_values = values * accum_value_discounts
discounted_rewards = rewards * accum_reward_discounts
cumulative_rewards = tf.cumsum(discounted_rewards, axis=1)
preturns = cumulative_rewards + discounted_values
util.activation_summary(preturns)
return preturns
def preturn_network(rewards, discounts, values):
# First reward must be zero, first discount must be one
first_reward = tf.Assert(
tf.reduce_all(tf.equal(rewards[:, 0, :], 0.0)), [rewards[:, 0, :]])
first_discount = tf.Assert(
tf.reduce_all(tf.equal(discounts[:, 0, :], 1.0)), [discounts[:, 0, :]])
with tf.control_dependencies([first_reward, first_discount]):
with tf.variable_scope('preturn'):
accum_value_discounts = tf.cumprod(discounts, axis=1, exclusive=False)
accum_reward_discounts = tf.cumprod(discounts, axis=1, exclusive=True)
discounted_values = values * accum_value_discounts
discounted_rewards = rewards * accum_reward_discounts
cumulative_rewards = tf.cumsum(discounted_rewards, axis=1)
preturns = cumulative_rewards + discounted_values
util.activation_summary(preturns)
return preturns
def get_marginal_likelihood(yt, mean_yt, xt, s, alpha, beta, eta_mu, eta_sigma, eps, sigma_px, epsilon = 1e-8):
yt_expand = tf.expand_dims(yt, 0)
mean_yt = tf.reshape(mean_yt, [s, FLAGS.batch_size, 784])
xt = tf.reshape(xt, [1, s, FLAGS.batch_size, FLAGS.hidden_size])
# p_ygivenx = tf.reduce_prod(tf.pow(mean_yt, yt_expand) * tf.pow(1 - mean_yt, 1 - yt_expand), axis=2)
v = alpha / (alpha + beta)
pi = tf.concat(0, [v, [1.0]]) * tf.concat(0, [[1.0], tf.cumprod(1 - v)])
p_x = gaussian_mixture_pdf(eta_mu, tf.square(eta_sigma) + tf.square(sigma_px), xt, pi)
log_p_y_s = tf.reduce_sum(yt_expand * tf.log(mean_yt + epsilon) \
+ (1.0 - yt_expand) * tf.log(1.0 - mean_yt + epsilon), 2) \
+ tf.log(p_x) \
+ 0.5 * tf.reduce_sum(tf.square(eps), 2)
log_p_y_s_max = tf.reduce_max(log_p_y_s, reduction_indices=0)
log_p_y = tf.log(tf.reduce_mean(tf.exp(log_p_y_s - log_p_y_s_max), 0)) + log_p_y_s_max
return tf.reduce_mean(log_p_y)
# Taken from: https://github.com/tensorflow/tensorflow/issues/6322
def sample(self, n=None):
if self._bernoulli is None:
self._bernoulli = Bernoulli(self._steps_probs)
sample = self._bernoulli.sample(n)
sample = tf.cumprod(sample, tf.rank(sample) - 1)
sample = tf.reduce_sum(sample, -1)
return sample
def crf_loss(y, y_, transitions, nums_tags, batch_size):
tag_scores = y
nums_steps = len(tf.unstack(tag_scores, axis=1))
masks = tf.cast(tf.sign(y_), dtype=tf.float32)
lengths = tf.reduce_sum(tf.sign(y_), axis=1)
tag_ids = y_
b_id = tf.stack([[nums_tags]] * batch_size)
#e_id = tf.pack([[0]] * batch_size)
padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids])
idx_tag_ids = tf.stack([tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)], axis=1)
tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags)
point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2)
point_score *= masks
#Save for future
#trans_score = tf.gather_nd(transitions, idx_tag_ids)
trans_sh = tf.stack(transitions.get_shape())
trans_sh = tf.cumprod(trans_sh, exclusive=True, reverse=True)
flat_tag_ids = tf.reduce_sum(trans_sh * idx_tag_ids, axis=2)
trans_score = tf.gather(tf.reshape(transitions, [-1]), flat_tag_ids)
##
#extend_mask = tf.concat(1, [tf.ones([batch_size, 1]), masks])
extend_mask = masks
trans_score *= extend_mask
target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score)
total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags, lengths, batch_size)()
return - (target_path_score - total_path_score)
def _allocation(self, usage):
r"""Computes allocation by sorting `usage`.
This corresponds to the value a = a_t[\phi_t[j]] in the paper.
Args:
usage: tensor of shape `[batch_size, memory_size]` indicating current
memory usage. This is equal to u_t in the paper when we only have one
write head, but for multiple write heads, one should update the usage
while iterating through the write heads to take into account the
allocation returned by this function.
Returns:
Tensor of shape `[batch_size, memory_size]` corresponding to allocation.
"""
with tf.name_scope('allocation'):
# Ensure values are not too small prior to cumprod.
usage = _EPSILON + (1 - _EPSILON) * usage
nonusage = 1 - usage
sorted_nonusage, indices = tf.nn.top_k(
nonusage, k=self._memory_size, name='sort')
sorted_usage = 1 - sorted_nonusage
prod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True)
sorted_allocation = sorted_nonusage * prod_sorted_usage
inverse_indices = util.batch_invert_permutation(indices)
# This final line "unsorts" sorted_allocation, so that the indexing
# corresponds to the original indexing of `usage`.
return util.batch_gather(sorted_allocation, inverse_indices)
def lambda_preturn_network(preturns, lambdas):
# Final lamdba must be zero
final_lambda = tf.Assert(
tf.reduce_all(tf.equal(lambdas[:, -1, :], 0.0)), [lambdas[:, -1, :]])
with tf.control_dependencies([final_lambda]):
with tf.variable_scope('lambda_preturn'):
accum_lambda = tf.cumprod(lambdas, axis=1, exclusive=True)
lambda_bar = (1 - lambdas) * accum_lambda # This should always sum to 1
lambda_preturn = tf.reduce_sum(
lambda_bar * preturns, reduction_indices=1)
util.activation_summary(lambda_preturn)
return lambda_preturn
def lambda_preturn_network(preturns, lambdas):
# Final lamdba must be zero
final_lambda = tf.Assert(
tf.reduce_all(tf.equal(lambdas[:, -1, :], 0.0)), [lambdas[:, -1, :]])
with tf.control_dependencies([final_lambda]):
with tf.variable_scope('lambda_preturn'):
accum_lambda = tf.cumprod(lambdas, axis=1, exclusive=True)
lambda_bar = (1 - lambdas) * accum_lambda # This should always sum to 1
lambda_preturn = tf.reduce_sum(
lambda_bar * preturns, reduction_indices=1)
util.activation_summary(lambda_preturn)
return lambda_preturn
def cumprod(x, axis=0):
"""Cumulative product of the values in a tensor, alongside the specified axis.
# Arguments
x: A tensor or variable.
axis: An integer, the axis to compute the product.
# Returns
A tensor of the cumulative product of values of `x` along `axis`.
"""
axis = _normalize_axis(axis, ndim(x))
return tf.cumprod(x, axis=axis)
def unravel_index(indices, shape):
with tf.name_scope('unravel_index'):
indices = tf.expand_dims(indices, 0)
shape = tf.expand_dims(shape, 1)
strides_shifted = tf.cumprod(shape, exclusive=True, reverse=True)
res = (indices // strides_shifted) % shape
return tf.transpose(res, (1, 0))
# TODO: get rid of this when TF fixes the NaN bugs in tf.svd:
# https://github.com/tensorflow/tensorflow/issues/8905
def get_marginal_likelihood(yt, mean_yt, xt, s, alpha, beta, eta_mu, eta_sigma, eps, sigma_px, epsilon = 1e-8):
yt_expand = tf.expand_dims(yt, 0)
mean_yt = tf.reshape(mean_yt, [s, FLAGS.batch_size, 784])
xt = tf.reshape(xt, [1, s, FLAGS.batch_size, FLAGS.hidden_size])
# p_ygivenx = tf.reduce_prod(tf.pow(mean_yt, yt_expand) * tf.pow(1 - mean_yt, 1 - yt_expand), axis=2)
v = alpha / (alpha + beta)
pi = tf.concat(0, [v, [1.0]]) * tf.concat(0, [[1.0], tf.cumprod(1 - v)])
p_x = gaussian_mixture_pdf(eta_mu, tf.square(eta_sigma) + tf.square(sigma_px), xt, pi)
log_p_y_s = tf.reduce_sum(yt_expand * tf.log(mean_yt + epsilon) \
+ (1.0 - yt_expand) * tf.log(1.0 - mean_yt + epsilon), 2) \
+ tf.log(p_x) \
+ 0.5 * tf.reduce_sum(tf.square(eps), 2)
log_p_y_s_max = tf.reduce_max(log_p_y_s, reduction_indices=0)
log_p_y = tf.log(tf.reduce_mean(tf.exp(log_p_y_s - log_p_y_s_max), 0)) + log_p_y_s_max
return tf.reduce_mean(log_p_y)
fft_tree_constrained_inference.py 文件源码
项目:wip-constrained-extractor
作者: brain-research
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def padded_gather_nd(params, indices, r, idx_rank):
"""Version of gather_nd that supports gradients and blank indices.
Works like gather_nd, but if an index is given as -1, a 0 will be inserted
in that spot in the output tensor.
Args:
params: tensor from which to gather (see gather_nd).
indices: tensor of indices (see gather_nd).
r: rank of params
idx_rank: rank of indices
Returns:
result: tensor shaped like indices containing things gathered from params
"""
# treats -1 indices as always gathering zeros
# pad 0 onto beginning of final dim of params
broadcasted_shift = tf.reshape(
tf.one_hot(
[r - 1], r, dtype=tf.int32), [1] * (idx_rank - 1) + [r])
shifted_idx = indices + broadcasted_shift
# unused indices might contain garbage, just 0 this out
shifted_idx = tf.maximum(0, shifted_idx)
padded_params = tf.pad(params, [[0, 0]] * (r - 1) + [[1, 0]])
# no gather_nd for now because gradient doesn't work
# return tf.gather_nd(padded_params,shifted_idx)
# HACK: work around lack of gradient for gather_nd
# params has shape of rank r
# indices has shape of rank idx_rank
params_shape = [d.value for d in padded_params.get_shape()]
idx_shape = [d.value for d in shifted_idx.get_shape()]
flat_params_x_size = 1
for dim in params_shape:
flat_params_x_size *= dim
flat_idx_x_size = 1
for dim in idx_shape[:-1]:
flat_idx_x_size *= dim
index_strides = tf.concat(
0, [tf.cumprod(
params_shape[1:], reverse=True), [1]])
index_strides = tf.reshape(index_strides, [1] * (idx_rank - 1) + [-1])
flat_idx = tf.reduce_sum(shifted_idx * index_strides, idx_rank - 1)
flat_idx = tf.reshape(flat_idx, [flat_idx_x_size])
flat_params = tf.reshape(padded_params, [flat_params_x_size])
result = tf.gather(flat_params, flat_idx)
result = tf.reshape(result, idx_shape[:-1])
return result