def approximate_duality_gap(self):
"""Add operations to compute the approximate duality gap.
Returns:
An Operation that computes the approximate duality gap over all
examples.
"""
with name_scope('sdca/approximate_duality_gap'):
_, values_list = self._hashtable.export_sharded()
shard_sums = []
for values in values_list:
with ops.device(values.device):
# For large tables to_double() below allocates a large temporary
# tensor that is freed once the sum operation completes. To reduce
# peak memory usage in cases where we have multiple large tables on a
# single device, we serialize these operations.
# Note that we need double precision to get accurate results.
with ops.control_dependencies(shard_sums):
shard_sums.append(
math_ops.reduce_sum(math_ops.to_double(values), 0))
summed_values = math_ops.add_n(shard_sums)
primal_loss = summed_values[1]
dual_loss = summed_values[2]
example_weights = summed_values[3]
# Note: we return NaN if there are no weights or all weights are 0, e.g.
# if no examples have been processed
return (primal_loss + dual_loss + self._l1_loss() +
(2.0 * self._l2_loss(self._symmetric_l2_regularization()))
) / example_weights
python类add_n()的实例源码
def _scaled_dot_product(scale, xs, ys, name=None):
"""Calculate a scaled, vector inner product between lists of Tensors."""
with ops.name_scope(name, 'scaled_dot_product', [scale, xs, ys]) as scope:
# Some of the parameters in our Butcher tableau include zeros. Using
# _possibly_nonzero lets us avoid wasted computation.
return math_ops.add_n([(scale * x) * y for x, y in zip(xs, ys)
if _possibly_nonzero(x) or _possibly_nonzero(y)],
name=scope)
def _dot_product(xs, ys, name=None):
"""Calculate the vector inner product between two lists of Tensors."""
with ops.name_scope(name, 'dot_product', [xs, ys]) as scope:
return math_ops.add_n([x * y for x, y in zip(xs, ys)], name=scope)
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None):
"""Returns predictions and loss for sequence of predictions.
Args:
decoding: List of Tensors with predictions.
labels: List of Tensors with labels.
sampling_decoding: Optional, List of Tensor with predictions to be used
in sampling. E.g. they shouldn't have dependncy on outputs.
If not provided, decoding is used.
name: Operation name.
Returns:
Predictions and losses tensors.
"""
with ops.name_scope(name, "sequence_classifier", [decoding, labels]):
predictions, xent_list = [], []
for i, pred in enumerate(decoding):
xent_list.append(nn.softmax_cross_entropy_with_logits(
pred, labels[i],
name="sequence_loss/xent_raw{0}".format(i)))
if sampling_decoding:
predictions.append(nn.softmax(sampling_decoding[i]))
else:
predictions.append(nn.softmax(pred))
xent = math_ops.add_n(xent_list, name="sequence_loss/xent")
loss = math_ops.reduce_sum(xent, name="sequence_loss")
return array_ops_.pack(predictions, axis=1), loss
def sequence_loss_by_example(logits, targets, weights,
average_across_timesteps=True,
softmax_loss_function=None, name=None):
if len(targets) != len(logits) or len(weights) != len(logits):
raise ValueError("Lengths of logits, weights, and targets must be the same "
"%d, %d, %d." % (len(logits), len(weights), len(targets)))
with ops.name_scope(name, "sequence_loss_by_example",
logits + targets + weights):
log_perp_list = []
for logit, target, weight in zip(logits, targets, weights):
if softmax_loss_function is None:
# TODO(irving,ebrevdo): This reshape is needed because
# sequence_loss_by_example is called with scalars sometimes, which
# violates our general scalar strictness policy.
target = array_ops.reshape(target, [-1])
crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
logit, target)
else:
crossent = softmax_loss_function(logit, target)
log_perp_list.append(crossent * weight)
log_perps = math_ops.add_n(log_perp_list)
if average_across_timesteps:
total_size = math_ops.add_n(weights)
total_size += 1e-12 # Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps
def apply_regularization(regularizer, weights_list=None):
"""Returns the summed penalty by applying `regularizer` to the `weights_list`.
Adding a regularization penalty over the layer weights and embedding weights
can help prevent overfitting the training data. Regularization over layer
biases is less common/useful, but assuming proper data preprocessing/mean
subtraction, it usually shouldn't hurt much either.
Args:
regularizer: A function that takes a single `Tensor` argument and returns
a scalar `Tensor` output.
weights_list: List of weights `Tensors` or `Variables` to apply
`regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if
`None`.
Returns:
A scalar representing the overall regularization penalty.
Raises:
ValueError: If `regularizer` does not return a scalar output.
"""
if not weights_list:
weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS)
with ops.op_scope(weights_list, 'get_regularization_penalty') as scope:
penalties = [regularizer(w) for w in weights_list]
for p in penalties:
if p.get_shape().ndims != 0:
raise ValueError('regularizer must return a scalar Tensor instead of a '
'Tensor with rank %d.' % p.get_shape().ndims)
summed_penalty = math_ops.add_n(penalties, name=scope)
ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty)
return summed_penalty
grl_seq2seq.py 文件源码
项目:Deep-Reinforcement-Learning-for-Dialogue-Generation-in-tensorflow
作者: liuyuemaicha
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def sequence_loss_by_example(logits, targets, weights,
average_across_timesteps=True,
softmax_loss_function=None,
name=None):
if len(targets) != len(logits) or len(weights) != len(logits):
raise ValueError("Lengths of logits, weights, and targets must be the same "
"%d, %d, %d." % (len(logits), len(weights), len(targets)))
with ops.name_scope(name, "sequence_loss_by_example", logits + targets + weights):
log_perp_list = []
for logit, target, weight in zip(logits, targets, weights):
if softmax_loss_function is None:
# TODO(irving,ebrevdo): This reshape is needed because
# sequence_loss_by_example is called with scalars sometimes, which
# violates our general scalar strictness policy.
target = array_ops.reshape(target, [-1])
crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
logit, target)
else:
crossent = softmax_loss_function(logit, target)
log_perp_list.append(crossent * weight)
log_perps = math_ops.add_n(log_perp_list)
if average_across_timesteps:
total_size = math_ops.add_n(weights)
total_size += 1e-12 # Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps
stochastic_graph.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def _add_n_or_sum(terms):
# add_n works for Tensors of the same dtype and shape
shape = terms[0].get_shape()
dtype = terms[0].dtype
if all(term.get_shape().is_fully_defined() and
term.get_shape().is_compatible_with(shape) and term.dtype == dtype
for term in terms):
return math_ops.add_n(terms)
else:
return sum(terms)
stochastic_graph_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def _testSurrogateLoss(self, session, losses, expected_addl_terms, xs):
surrogate_loss = sg.surrogate_loss(losses)
expected_surrogate_loss = math_ops.add_n(losses + expected_addl_terms)
self.assertAllClose(*session.run([surrogate_loss, expected_surrogate_loss]))
# Test backprop
expected_grads = gradients_impl.gradients(ys=expected_surrogate_loss, xs=xs)
surrogate_grads = gradients_impl.gradients(ys=surrogate_loss, xs=xs)
self.assertEqual(len(expected_grads), len(surrogate_grads))
grad_values = session.run(expected_grads + surrogate_grads)
n_grad = len(expected_grads)
self.assertAllClose(grad_values[:n_grad], grad_values[n_grad:])
sharded_mutable_dense_hashtable.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def size(self, name=None):
with ops.name_scope(name, 'sharded_mutable_hash_table_size'):
sizes = [
self._table_shards[i].size() for i in range(self._num_shards)
]
return math_ops.add_n(sizes)
sdca_ops.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def _l2_loss(self, l2):
"""Computes the (un-normalized) l2 loss of the model."""
with name_scope('sdca/l2_loss'):
sums = []
for name in ['sparse_features_weights', 'dense_features_weights']:
for weights in self._convert_n_to_tensor(self._variables[name]):
with ops.device(weights.device):
sums.append(
math_ops.reduce_sum(
math_ops.square(math_ops.cast(weights, dtypes.float64))))
sum = math_ops.add_n(sums)
# SDCA L2 regularization cost is: l2 * sum(weights^2) / 2
return l2 * sum / 2.0
sdca_ops.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def approximate_duality_gap(self):
"""Add operations to compute the approximate duality gap.
Returns:
An Operation that computes the approximate duality gap over all
examples.
"""
with name_scope('sdca/approximate_duality_gap'):
_, values_list = self._hashtable.export_sharded()
shard_sums = []
for values in values_list:
with ops.device(values.device):
# For large tables to_double() below allocates a large temporary
# tensor that is freed once the sum operation completes. To reduce
# peak memory usage in cases where we have multiple large tables on a
# single device, we serialize these operations.
# Note that we need double precision to get accurate results.
with ops.control_dependencies(shard_sums):
shard_sums.append(
math_ops.reduce_sum(math_ops.to_double(values), 0))
summed_values = math_ops.add_n(shard_sums)
primal_loss = summed_values[1]
dual_loss = summed_values[2]
example_weights = summed_values[3]
# Note: we return NaN if there are no weights or all weights are 0, e.g.
# if no examples have been processed
return (primal_loss + dual_loss + self._l1_loss() +
(2.0 * self._l2_loss(self._symmetric_l2_regularization()))
) / example_weights
odes.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def _scaled_dot_product(scale, xs, ys, name=None):
"""Calculate a scaled, vector inner product between lists of Tensors."""
with ops.name_scope(name, 'scaled_dot_product', [scale, xs, ys]) as scope:
# Some of the parameters in our Butcher tableau include zeros. Using
# _possibly_nonzero lets us avoid wasted computation.
return math_ops.add_n([(scale * x) * y for x, y in zip(xs, ys)
if _possibly_nonzero(x) or _possibly_nonzero(y)],
name=scope)
odes.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def _dot_product(xs, ys, name=None):
"""Calculate the vector inner product between two lists of Tensors."""
with ops.name_scope(name, 'dot_product', [xs, ys]) as scope:
return math_ops.add_n([x * y for x, y in zip(xs, ys)], name=scope)
seq2seq_ops.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None):
"""Returns predictions and loss for sequence of predictions.
Args:
decoding: List of Tensors with predictions.
labels: List of Tensors with labels.
sampling_decoding: Optional, List of Tensor with predictions to be used
in sampling. E.g. they shouldn't have dependncy on outputs.
If not provided, decoding is used.
name: Operation name.
Returns:
Predictions and losses tensors.
"""
with ops.name_scope(name, "sequence_classifier", [decoding, labels]):
predictions, xent_list = [], []
for i, pred in enumerate(decoding):
xent_list.append(nn.softmax_cross_entropy_with_logits(
labels=labels[i], logits=pred,
name="sequence_loss/xent_raw{0}".format(i)))
if sampling_decoding:
predictions.append(nn.softmax(sampling_decoding[i]))
else:
predictions.append(nn.softmax(pred))
xent = math_ops.add_n(xent_list, name="sequence_loss/xent")
loss = math_ops.reduce_sum(xent, name="sequence_loss")
return array_ops.stack(predictions, axis=1), loss
regularizers.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def apply_regularization(regularizer, weights_list=None):
"""Returns the summed penalty by applying `regularizer` to the `weights_list`.
Adding a regularization penalty over the layer weights and embedding weights
can help prevent overfitting the training data. Regularization over layer
biases is less common/useful, but assuming proper data preprocessing/mean
subtraction, it usually shouldn't hurt much either.
Args:
regularizer: A function that takes a single `Tensor` argument and returns
a scalar `Tensor` output.
weights_list: List of weights `Tensors` or `Variables` to apply
`regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if
`None`.
Returns:
A scalar representing the overall regularization penalty.
Raises:
ValueError: If `regularizer` does not return a scalar output, or if we find
no weights.
"""
if not weights_list:
weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS)
if not weights_list:
raise ValueError('No weights to regularize.')
with ops.name_scope('get_regularization_penalty',
values=weights_list) as scope:
penalties = [regularizer(w) for w in weights_list]
penalties = [
p if p is not None else constant_op.constant(0.0) for p in penalties
]
for p in penalties:
if p.get_shape().ndims != 0:
raise ValueError('regularizer must return a scalar Tensor instead of a '
'Tensor with rank %d.' % p.get_shape().ndims)
summed_penalty = math_ops.add_n(penalties, name=scope)
ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty)
return summed_penalty
embedding_ops_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def test_distributive_property(self):
"""Verifies the distributive property of matrix multiplication."""
with self.test_session():
params = constant_op.constant([.1, .2, .3])
sp_values_a = sparse_tensor_lib.SparseTensor(
values=["a"], indices=[[0, 0]], dense_shape=[3, 1])
sp_values_b = sparse_tensor_lib.SparseTensor(
values=["b"], indices=[[2, 0]], dense_shape=[3, 1])
sp_values_c = sparse_tensor_lib.SparseTensor(
values=["c"], indices=[[2, 0]], dense_shape=[3, 1])
sp_values = sparse_tensor_lib.SparseTensor(
values=["a", "b", "c"],
indices=[[0, 0], [2, 0], [2, 1]],
dense_shape=[3, 2])
result_a = embedding_ops._sampled_scattered_embedding_lookup_sparse(
params, sp_values_a, dimension=4, hash_key=self._hash_key)
result_b = embedding_ops._sampled_scattered_embedding_lookup_sparse(
params, sp_values_b, dimension=4, hash_key=self._hash_key)
result_c = embedding_ops._sampled_scattered_embedding_lookup_sparse(
params, sp_values_c, dimension=4, hash_key=self._hash_key)
result = embedding_ops._sampled_scattered_embedding_lookup_sparse(
params, sp_values, dimension=4, hash_key=self._hash_key)
result_abc = math_ops.add_n([result_a, result_b, result_c])
self.assertAllClose(result.eval(), result_abc.eval())
clustering_ops.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers):
"""Creates an op for training for full batch case.
Args:
inputs: list of input Tensors.
cluster_idx_list: A vector (or list of vectors). Each element in the
vector corresponds to an input row in 'inp' and specifies the cluster id
corresponding to the input.
cluster_centers: Tensor Ref of cluster centers.
Returns:
An op for doing an update of mini-batch k-means.
"""
cluster_sums = []
cluster_counts = []
epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype)
for inp, cluster_idx in zip(inputs, cluster_idx_list):
with ops.colocate_with(inp):
cluster_sums.append(
math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters))
cluster_counts.append(
math_ops.unsorted_segment_sum(
array_ops.reshape(
array_ops.ones(
array_ops.reshape(array_ops.shape(inp)[0], [-1])),
[-1, 1]), cluster_idx, self._num_clusters))
with ops.colocate_with(cluster_centers):
new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast(
math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon)
if self._clusters_l2_normalized():
new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1)
return state_ops.assign(cluster_centers, new_clusters_centers)
def sequence_loss_by_example(logits,
targets,
weights,
average_across_timesteps=True,
softmax_loss_function=None,
name=None):
"""Weighted cross-entropy loss for a sequence of logits (per example).
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, default: "sequence_loss_by_example".
Returns:
1D batch-sized float Tensor: The log-perplexity for each sequence.
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
if len(targets) != len(logits) or len(weights) != len(logits):
raise ValueError("Lengths of logits, weights, and targets must be the same "
"%d, %d, %d." % (len(logits), len(weights), len(targets)))
with ops.name_scope(name, "sequence_loss_by_example",
logits + targets + weights):
log_perp_list = []
for logit, target, weight in zip(logits, targets, weights):
if softmax_loss_function is None:
# TODO(irving,ebrevdo): This reshape is needed because
# sequence_loss_by_example is called with scalars sometimes, which
# violates our general scalar strictness policy.
target = array_ops.reshape(target, [-1])
crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
labels=target, logits=logit)
else:
crossent = softmax_loss_function(target, logit)
log_perp_list.append(crossent * weight)
log_perps = math_ops.add_n(log_perp_list)
if average_across_timesteps:
total_size = math_ops.add_n(weights)
total_size += 1e-12 # Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps
def sequence_loss_by_example(logits, targets, weights,
average_across_timesteps=True,
softmax_loss_function=None, name=None):
"""Weighted cross-entropy loss for a sequence of logits (per example).
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, default: "sequence_loss_by_example".
Returns:
1D batch-sized float Tensor: The log-perplexity for each sequence.
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
if len(targets) != len(logits) or len(weights) != len(logits):
raise ValueError("Lengths of logits, weights, and targets must be the same "
"%d, %d, %d." % (len(logits), len(weights), len(targets)))
with ops.op_scope(logits + targets + weights, name,
"sequence_loss_by_example"):
log_perp_list = []
for logit, target, weight in zip(logits, targets, weights):
if softmax_loss_function is None:
# TODO(irving,ebrevdo): This reshape is needed because
# sequence_loss_by_example is called with scalars sometimes, which
# violates our general scalar strictness policy.
target = array_ops.reshape(target, [-1])
crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(logits=logit,
labels=target)
else:
crossent = softmax_loss_function(logit, target)
log_perp_list.append(crossent * weight)
log_perps = math_ops.add_n(log_perp_list)
if average_across_timesteps:
total_size = math_ops.add_n(weights)
total_size += 1e-12 # Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps