def _broadcast_weights(weights, values):
"""Broadcast `weights` to the same shape as `values`.
This returns a version of `weights` following the same broadcast rules as
`mul(weights, values)`. When computing a weighted average, use this function
to broadcast `weights` before summing them; e.g.,
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
Args:
weights: `Tensor` whose shape is broadcastable to `values`.
values: `Tensor` of any shape.
Returns:
`weights` broadcast to `values` shape.
"""
weights_shape = weights.get_shape()
values_shape = values.get_shape()
if(weights_shape.is_fully_defined() and
values_shape.is_fully_defined() and
weights_shape.is_compatible_with(values_shape)):
return weights
return math_ops.mul(
weights, array_ops.ones_like(values), name='broadcast_weights')
# =========================================================================== #
# TF Extended metrics: TP and FP arrays.
# =========================================================================== #
python类mul()的实例源码
def _broadcast_weights(weights, values):
"""Broadcast `weights` to the same shape as `values`.
This returns a version of `weights` following the same broadcast rules as
`mul(weights, values)`. When computing a weighted average, use this function
to broadcast `weights` before summing them; e.g.,
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
Args:
weights: `Tensor` whose shape is broadcastable to `values`.
values: `Tensor` of any shape.
Returns:
`weights` broadcast to `values` shape.
"""
weights_shape = weights.get_shape()
values_shape = values.get_shape()
if(weights_shape.is_fully_defined() and
values_shape.is_fully_defined() and
weights_shape.is_compatible_with(values_shape)):
return weights
return math_ops.mul(
weights, array_ops.ones_like(values), name='broadcast_weights')
# =========================================================================== #
# TF Extended metrics: TP and FP arrays.
# =========================================================================== #
def _scale_losses(losses, weight):
"""Computes the scaled loss.
Args:
losses: A `Tensor` of size [batch_size, d1, ... dN].
weight: A `Tensor` of size [1], [batch_size] or [batch_size, d1, ... dN].
The `losses` are reduced (tf.reduce_sum) until its dimension matches
that of `weight` at which point the reduced `losses` are element-wise
multiplied by `weight` and a final reduce_sum is computed on the result.
Conceptually, this operation is equivalent to broadcasting (tiling)
`weight` to be the same size as `losses`, performing an element-wise
multiplication, and summing the result.
Returns:
A scalar tf.float32 `Tensor` whose value represents the sum of the scaled
`losses`.
"""
# First, compute the sum of the losses over all elements:
start_index = max(0, weight.get_shape().ndims)
reduction_indices = list(range(start_index, losses.get_shape().ndims))
reduced_losses = math_ops.reduce_sum(losses,
reduction_indices=reduction_indices)
reduced_losses = math_ops.mul(reduced_losses, weight)
return math_ops.reduce_sum(reduced_losses)
def hinge_loss(logits, target, scope=None):
"""Method that returns the loss tensor for hinge loss.
Args:
logits: The logits, a float tensor.
target: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
scope: The scope for the operations performed in computing the loss.
Returns:
A `Tensor` of same shape as logits and target representing the loss values
across the batch.
Raises:
ValueError: If the shapes of `logits` and `target` don't match.
"""
with ops.name_scope(scope, "hinge_loss", [logits, target]) as scope:
logits.get_shape().assert_is_compatible_with(target.get_shape())
# We first need to convert binary labels to -1/1 labels (as floats).
target = math_ops.to_float(target)
all_ones = array_ops.ones_like(target)
labels = math_ops.sub(2 * target, all_ones)
losses = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
return losses
def _scale_losses(losses, weights):
"""Computes the scaled loss.
Args:
losses: A `Tensor` of size [batch_size, d1, ... dN].
weights: A `Tensor` of size [1], [batch_size] or [batch_size, d1, ... dN].
The `losses` are reduced (tf.reduce_sum) until its dimension matches
that of `weights` at which point the reduced `losses` are element-wise
multiplied by `weights` and a final reduce_sum is computed on the result.
Conceptually, this operation is equivalent to broadcasting (tiling)
`weights` to be the same size as `losses`, performing an element-wise
multiplication, and summing the result.
Returns:
A scalar tf.float32 `Tensor` whose value represents the sum of the scaled
`losses`.
"""
# First, compute the sum of the losses over all elements:
start_index = max(0, weights.get_shape().ndims)
reduction_indices = list(range(start_index, losses.get_shape().ndims))
reduced_losses = math_ops.reduce_sum(losses,
reduction_indices=reduction_indices)
reduced_losses = math_ops.mul(reduced_losses, weights)
return math_ops.reduce_sum(reduced_losses)
def hinge_loss(logits, labels=None, scope=None, target=None):
"""Method that returns the loss tensor for hinge loss.
Args:
logits: The logits, a float tensor.
labels: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
scope: The scope for the operations performed in computing the loss.
target: Deprecated alias for `labels`.
Returns:
A `Tensor` of same shape as logits and target representing the loss values
across the batch.
Raises:
ValueError: If the shapes of `logits` and `labels` don't match.
"""
labels = _labels(labels, target)
with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
logits.get_shape().assert_is_compatible_with(labels.get_shape())
# We first need to convert binary labels to -1/1 labels (as floats).
labels = math_ops.to_float(labels)
all_ones = array_ops.ones_like(labels)
labels = math_ops.sub(2 * labels, all_ones)
return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
def _linear_predictions(self, examples):
"""Returns predictions of the form w*x."""
with name_scope('sdca/prediction'):
sparse_variables = self._convert_n_to_tensor(self._variables[
'sparse_features_weights'])
result = 0.0
for sfc, sv in zip(examples['sparse_features'], sparse_variables):
# TODO(sibyl-Aix6ihai): following does not take care of missing features.
result += math_ops.segment_sum(
math_ops.mul(
array_ops.gather(sv, sfc.feature_indices), sfc.feature_values),
sfc.example_indices)
dense_features = self._convert_n_to_tensor(examples['dense_features'])
dense_variables = self._convert_n_to_tensor(self._variables[
'dense_features_weights'])
for i in range(len(dense_variables)):
result += math_ops.matmul(dense_features[i], array_ops.expand_dims(
dense_variables[i], -1))
# Reshaping to allow shape inference at graph construction time.
return array_ops.reshape(result, [-1])
def _broadcast_weights(weights, values):
"""Broadcast `weights` to the same shape as `values`.
This returns a version of `weights` following the same broadcast rules as
`mul(weights, values)`. When computing a weighted average, use this function
to broadcast `weights` before summing them; e.g.,
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
Args:
weights: `Tensor` whose shape is broadcastable to `values`.
values: `Tensor` of any shape.
Returns:
`weights` broadcast to `values` shape.
"""
weights_shape = weights.get_shape()
values_shape = values.get_shape()
if (weights_shape.is_fully_defined() and
values_shape.is_fully_defined() and
weights_shape.is_compatible_with(values_shape)):
return weights
return math_ops.mul(
weights, array_ops.ones_like(values), name='broadcast_weights')
def _broadcast_weights(weights, values):
"""Broadcast `weights` to the same shape as `values`.
This returns a version of `weights` following the same broadcast rules as
`mul(weights, values)`. When computing a weighted average, use this function
to broadcast `weights` before summing them; e.g.,
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
Args:
weights: `Tensor` whose shape is broadcastable to `values`.
values: `Tensor` of any shape.
Returns:
`weights` broadcast to `values` shape.
"""
weights_shape = weights.get_shape()
values_shape = values.get_shape()
if(weights_shape.is_fully_defined() and
values_shape.is_fully_defined() and
weights_shape.is_compatible_with(values_shape)):
return weights
return math_ops.mul(
weights, array_ops.ones_like(values), name='broadcast_weights')
# =========================================================================== #
# TF Extended metrics: TP and FP arrays.
# =========================================================================== #
def __call__(self, inputs, state, scope=None):
"""Gated recurrent unit (GRU) with nunits cells."""
with vs.variable_scope(scope or type(self).__name__): # "GRUCell"
with vs.variable_scope("Gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
r, u, g = array_ops.split(1, 3, _linear([inputs, state],
3 * self._num_units, True, 1.0))
r, u, g = sigmoid(r), sigmoid(u), sigmoid(g)
with vs.variable_scope("Candidate"):
c = self._activation(_linear([inputs, r * state],
self._num_units, True))
new_h = u * state + (1 - u) * c
eps = 1e-13
temp = math_ops.div(math_ops.reduce_sum(math_ops.mul(new_h, state),1), \
math_ops.reduce_sum(math_ops.mul(state,state),1) + eps)
m = array_ops.transpose(g)
t1 = math_ops.mul(m , temp)
t1 = array_ops.transpose(t1)
distract_h = new_h - state * t1
return distract_h, distract_h
def _broadcast_weights(weights, values):
"""Broadcast `weights` to the same shape as `values`.
This returns a version of `weights` following the same broadcast rules as
`mul(weights, values)`. When computing a weighted average, use this function
to broadcast `weights` before summing them; e.g.,
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
Args:
weights: `Tensor` whose shape is broadcastable to `values`.
values: `Tensor` of any shape.
Returns:
`weights` broadcast to `values` shape.
"""
weights_shape = weights.get_shape()
values_shape = values.get_shape()
if(weights_shape.is_fully_defined() and
values_shape.is_fully_defined() and
weights_shape.is_compatible_with(values_shape)):
return weights
return math_ops.mul(
weights, array_ops.ones_like(values), name='broadcast_weights')
# =========================================================================== #
# TF Extended metrics: TP and FP arrays.
# =========================================================================== #
def log_loss(predictions, targets, weight=1.0, epsilon=1e-7, scope=None):
"""Adds a Log Loss term to the training procedure.
`weight` acts as a coefficient for the loss. If a scalar is provided, then the
loss is simply scaled by the given value. If `weight` is a tensor of size
[batch_size], then the total loss for each sample of the batch is rescaled
by the corresponding element in the `weight` vector. If the shape of
`weight` matches the shape of `predictions`, then the loss of each
measurable element of `predictions` is scaled by the corresponding value of
`weight`.
Args:
predictions: The predicted outputs.
targets: The ground truth output tensor, same dimensions as 'predictions'.
weight: Coefficients for the loss a scalar, a tensor of shape
[batch_size] or a tensor whose shape matches `predictions`.
epsilon: A small increment to add to avoid taking a log of zero.
scope: The scope for the operations performed in computing the loss.
Returns:
A scalar `Tensor` representing the loss value.
Raises:
ValueError: If the shape of `predictions` doesn't match that of `targets` or
if the shape of `weight` is invalid.
"""
with ops.name_scope(scope, "log_loss",
[predictions, targets]) as scope:
predictions.get_shape().assert_is_compatible_with(targets.get_shape())
if weight is None:
raise ValueError("`weight` cannot be None")
predictions = math_ops.to_float(predictions)
targets = math_ops.to_float(targets)
losses = -math_ops.mul(
targets,
math_ops.log(predictions + epsilon)) - math_ops.mul(
(1 - targets), math_ops.log(1 - predictions + epsilon))
return compute_weighted_loss(losses, weight)
def cosine_distance(predictions, targets, dim, weight=1.0, scope=None):
"""Adds a cosine-distance loss to the training procedure.
Note that the function assumes that the predictions and targets are already
unit-normalized.
Args:
predictions: An arbitrary matrix.
targets: A `Tensor` whose shape matches 'predictions'
dim: The dimension along which the cosine distance is computed.
weight: Coefficients for the loss a scalar, a tensor of shape
[batch_size] or a tensor whose shape matches `predictions`.
scope: The scope for the operations performed in computing the loss.
Returns:
A scalar `Tensor` representing the loss value.
Raises:
ValueError: If predictions.shape doesn't match targets.shape, if the ignore
mask is provided and its shape doesn't match targets.shape or if
the ignore mask is not boolean valued.
"""
with ops.name_scope(scope, "cosine_distance_loss",
[predictions, targets]) as scope:
predictions.get_shape().assert_is_compatible_with(targets.get_shape())
if weight is None:
raise ValueError("`weight` cannot be None")
predictions = math_ops.to_float(predictions)
targets = math_ops.to_float(targets)
radial_diffs = math_ops.mul(predictions, targets)
losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
return compute_weighted_loss(losses, weight)
def softmax_classifier(tensor_in,
labels,
weights,
biases,
class_weight=None,
name=None):
"""Returns prediction and loss for softmax classifier.
Args:
tensor_in: Input tensor, [batch_size, feature_size], features.
labels: Tensor, [batch_size, n_classes], labels of the output classes.
weights: Tensor, [batch_size, feature_size], linear transformation
matrix.
biases: Tensor, [batch_size], biases.
class_weight: Tensor, optional, [n_classes], weight for each class.
If not given, all classes are supposed to have weight one.
name: Operation name.
Returns:
Prediction and loss tensors.
"""
with ops.name_scope(name, "softmax_classifier", [tensor_in, labels]):
logits = nn.xw_plus_b(tensor_in, weights, biases)
if class_weight is not None:
logits = math_ops.mul(logits, class_weight)
return nn.softmax(logits), loss_ops.softmax_cross_entropy(logits, labels)
def _weighted_loss(loss, weight_tensor):
unweighted_loss = array_ops.reshape(loss, shape=(-1,))
weighted_loss = math_ops.mul(
unweighted_loss, array_ops.reshape(weight_tensor, shape=(-1,)))
return math_ops.div(
math_ops.reduce_sum(weighted_loss),
math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
name="loss")
def _weighted_loss(self, loss, weight_tensor):
"""Returns cumulative weighted loss."""
unweighted_loss = array_ops.reshape(loss, shape=(-1,))
weighted_loss = math_ops.mul(unweighted_loss,
array_ops.reshape(
weight_tensor, shape=(-1,)))
return weighted_loss
def _count_condition(values, weights=None, metrics_collections=None,
updates_collections=None):
"""Sums the weights of cases where the given values are True.
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
values: A `bool` `Tensor` of arbitrary size.
weights: An optional `Tensor` whose shape is broadcastable to `values`.
metrics_collections: An optional list of collections that the metric
value variable should be added to.
updates_collections: An optional list of collections that the metric update
ops should be added to.
Returns:
value_tensor: A tensor representing the current value of the metric.
update_op: An operation that accumulates the error from a batch of data.
Raises:
ValueError: If `weights` is not `None` and its shape doesn't match `values`,
or if either `metrics_collections` or `updates_collections` are not a list
or tuple.
"""
check_ops.assert_type(values, dtypes.bool)
count = _create_local('count', shape=[])
values = math_ops.to_float(values)
if weights is not None:
weights = math_ops.to_float(weights)
values = math_ops.mul(values, weights)
value_tensor = array_ops.identity(count)
update_op = state_ops.assign_add(count, math_ops.reduce_sum(values))
if metrics_collections:
ops.add_to_collections(metrics_collections, value_tensor)
if updates_collections:
ops.add_to_collections(updates_collections, update_op)
return value_tensor, update_op
def accuracy(predictions, labels, weights=None):
"""Computes the percentage of times that predictions matches labels.
Args:
predictions: the predicted values, a `Tensor` whose dtype and shape
matches 'labels'.
labels: the ground truth values, a `Tensor` of any shape and
bool, integer, or string dtype.
weights: None or `Tensor` of float values to reweight the accuracy.
Returns:
Accuracy `Tensor`.
Raises:
ValueError: if dtypes don't match or
if dtype is not bool, integer, or string.
"""
if not (labels.dtype.is_integer or
labels.dtype in (dtypes.bool, dtypes.string)):
raise ValueError(
'Labels should have bool, integer, or string dtype, not %r' %
labels.dtype)
if not labels.dtype.is_compatible_with(predictions.dtype):
raise ValueError('Dtypes of predictions and labels should match. '
'Given: predictions (%r) and labels (%r)' %
(predictions.dtype, labels.dtype))
with ops.name_scope('accuracy', values=[predictions, labels]):
is_correct = math_ops.cast(
math_ops.equal(predictions, labels), dtypes.float32)
if weights is not None:
is_correct = math_ops.mul(is_correct, weights)
return math_ops.reduce_mean(is_correct)
def cosine_distance(
predictions, labels=None, dim=None, weights=_WEIGHT_SENTINEL, scope=None,
targets=None, weight=_WEIGHT_SENTINEL):
"""Adds a cosine-distance loss to the training procedure.
Note that the function assumes that `predictions` and `labels` are already
unit-normalized.
Args:
predictions: An arbitrary matrix.
labels: A `Tensor` whose shape matches 'predictions'
dim: The dimension along which the cosine distance is computed.
weights: Coefficients for the loss a scalar, a tensor of shape
[batch_size] or a tensor whose shape matches `predictions`.
scope: The scope for the operations performed in computing the loss.
targets: Deprecated alias for `labels`.
weight: Deprecated alias for `weights`.
Returns:
A scalar `Tensor` representing the loss value.
Raises:
ValueError: If `predictions` shape doesn't match `labels` shape, or
`weights` is `None`.
"""
labels = _labels(labels, targets)
weights = _weights(weights, weight)
if dim is None:
raise ValueError("`dim` cannot be None.")
with ops.name_scope(scope, "cosine_distance_loss",
[predictions, labels, weights]) as scope:
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
predictions = math_ops.to_float(predictions)
labels = math_ops.to_float(labels)
radial_diffs = math_ops.mul(predictions, labels)
losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
return compute_weighted_loss(losses, weights)
def _weighted_loss(loss, weight):
"""Returns cumulative weighted loss."""
unweighted_loss = array_ops.reshape(loss, shape=(-1,))
weighted_loss = math_ops.mul(unweighted_loss,
array_ops.reshape(
weight, shape=(-1,)))
return weighted_loss
def _weighted_loss(self, loss, weight_tensor):
"""Returns cumulative weighted loss."""
unweighted_loss = array_ops.reshape(loss, shape=(-1,))
weighted_loss = math_ops.mul(unweighted_loss,
array_ops.reshape(
weight_tensor, shape=(-1,)))
return weighted_loss
def _count_condition(values, weights=None, metrics_collections=None,
updates_collections=None):
"""Sums the weights of cases where the given values are True.
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
values: A `bool` `Tensor` of arbitrary size.
weights: An optional `Tensor` whose shape is broadcastable to `values`.
metrics_collections: An optional list of collections that the metric
value variable should be added to.
updates_collections: An optional list of collections that the metric update
ops should be added to.
Returns:
value_tensor: A tensor representing the current value of the metric.
update_op: An operation that accumulates the error from a batch of data.
Raises:
ValueError: If `weights` is not `None` and its shape doesn't match `values`,
or if either `metrics_collections` or `updates_collections` are not a list
or tuple.
"""
check_ops.assert_type(values, dtypes.bool)
count = _create_local('count', shape=[])
values = math_ops.to_float(values)
if weights is not None:
weights = math_ops.to_float(weights)
values = math_ops.mul(values, weights)
value_tensor = array_ops.identity(count)
update_op = state_ops.assign_add(count, math_ops.reduce_sum(values))
if metrics_collections:
ops.add_to_collections(metrics_collections, value_tensor)
if updates_collections:
ops.add_to_collections(updates_collections, update_op)
return value_tensor, update_op
def accuracy(predictions, labels, weights=None):
"""Computes the percentage of times that predictions matches labels.
Args:
predictions: the predicted values, a `Tensor` whose dtype and shape
matches 'labels'.
labels: the ground truth values, a `Tensor` of any shape and
bool, integer, or string dtype.
weights: None or `Tensor` of float values to reweight the accuracy.
Returns:
Accuracy `Tensor`.
Raises:
ValueError: if dtypes don't match or
if dtype is not bool, integer, or string.
"""
if not (labels.dtype.is_integer or
labels.dtype in (dtypes.bool, dtypes.string)):
raise ValueError(
'Labels should have bool, integer, or string dtype, not %r' %
labels.dtype)
if not labels.dtype.is_compatible_with(predictions.dtype):
raise ValueError('Dtypes of predictions and labels should match. '
'Given: predictions (%r) and labels (%r)' %
(predictions.dtype, labels.dtype))
with ops.name_scope('accuracy', values=[predictions, labels]):
is_correct = math_ops.cast(
math_ops.equal(predictions, labels), dtypes.float32)
if weights is not None:
is_correct = math_ops.mul(is_correct, weights)
num_values = math_ops.mul(weights, array_ops.ones_like(is_correct))
return math_ops.div(math_ops.reduce_sum(is_correct),
math_ops.reduce_sum(num_values))
return math_ops.reduce_mean(is_correct)
def __mul__(self, other):
return mul(self, other)
def __rmul__(self, other):
return mul(other, self)
def __call__(self, inputs, state, scope=None):
"""Gated recurrent unit (GRU) with nunits cells."""
with vs.variable_scope(scope or type(self).__name__): # "GRUCell"
with vs.variable_scope("Gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
r, u = array_ops.split(1, 2, _linear([inputs, state],
2 * self._num_units, True, 1.0))
r, u = sigmoid(r), sigmoid(u)
with vs.variable_scope("Candidate"):
c = self._activation(_linear([inputs, r * state],
self._num_units, True))
new_h = u * state + (1 - u) * c
eps = 1e-13
temp = math_ops.div(math_ops.reduce_sum(math_ops.mul(new_h, state), 1), \
math_ops.reduce_sum(math_ops.mul(state,state), 1) + eps)
dummy = array_ops.transpose(state)
t1 = math_ops.mul(dummy, temp)
t1 = array_ops.transpose(t1)
distract_h = new_h - state * t1
return distract_h, distract_h
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with vs.variable_scope(scope or type(self).__name__):
# Parameters of gates are concatenated into one multiply for efficiency.
if self._state_is_tuple:
c, h = state
else:
c, h = array_ops.split(1, 2, state)
concat = _linear([inputs, h], 5 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate, g= distract_gate
i, j, f, o, g = array_ops.split(1, 5, concat)
new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
self._activation(j))
eps = 1e-13
temp = math_ops.div(math_ops.reduce_sum(math_ops.mul(c, new_c),1),math_ops.reduce_sum(math_ops.mul(c,c),1) + eps)
m = array_ops.transpose(sigmoid(g))
t1 = math_ops.mul(m , temp)
t1 = array_ops.transpose(t1)
distract_c = new_c - c * t1
new_h = self._activation(distract_c) * sigmoid(o)
if self._state_is_tuple:
new_state = LSTMStateTuple(new_c, new_h)
else:
new_state = array_ops.concat(1, [new_c, new_h])
return new_h, new_state
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with vs.variable_scope(scope or type(self).__name__):
# Parameters of gates are concatenated into one multiply for efficiency.
if self._state_is_tuple:
c, h = state
else:
c, h = array_ops.split(1, 2, state)
concat = _linear([inputs, h], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(1, 4, concat)
new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
self._activation(j))
eps = 1e-13
temp = math_ops.div(math_ops.reduce_sum(math_ops.mul(c, new_c),1),math_ops.reduce_sum(math_ops.mul(c,c),1) + eps)
dummy = array_ops.transpose(c)
t1 = math_ops.mul(dummy, temp)
t1 = array_ops.transpose(t1)
distract_c = new_c - t1
new_h = self._activation(distract_c) * sigmoid(o)
if self._state_is_tuple:
new_state = LSTMStateTuple(new_c, new_h)
else:
new_state = array_ops.concat(1, [new_c, new_h])
return new_h, new_state
def _num_present(losses, weight, per_batch=False):
"""Computes the number of elements in the loss function induced by `weight`.
A given weight tensor induces different numbers of usable elements in the
`losses` tensor. The `weight` tensor is broadcast across `losses` for all
possible dimensions. For example, if `losses` is a tensor of dimension
[4, 5, 6, 3] and weight is a tensor of size [4, 5], then weight is, in effect,
tiled to match the size of `losses`. Following this effective tile, the total
number of present elements is the number of non-zero weights.
Args:
losses: A tensor of size [batch_size, d1, ... dN].
weight: A tensor of size [1] or [batch_size, d1, ... dK] where K < N.
per_batch: Whether to return the number of elements per batch or as a sum
total.
Returns:
The number of present (non-zero) elements in the losses tensor. If
`per_batch` is True, the value is returned as a tensor of size
[batch_size]. Otherwise, a single scalar tensor is returned.
"""
# To ensure that dims of [2, 1] gets mapped to [2,]
weight = array_ops.squeeze(weight)
# If the weight is a scalar, its easy to compute:
if weight.get_shape().ndims == 0:
batch_size = array_ops.reshape(array_ops.slice(array_ops.shape(losses),
[0], [1]), [])
num_per_batch = math_ops.div(math_ops.to_float(array_ops.size(losses)),
math_ops.to_float(batch_size))
num_per_batch = math_ops.select(math_ops.equal(weight, 0),
0.0, num_per_batch)
num_per_batch = math_ops.mul(array_ops.ones(
array_ops.reshape(batch_size, [1])), num_per_batch)
return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
# First, count the number of nonzero weights:
if weight.get_shape().ndims >= 1:
reduction_indices = list(range(1, weight.get_shape().ndims))
num_nonzero_per_batch = math_ops.reduce_sum(
math_ops.to_float(math_ops.not_equal(weight, 0)),
reduction_indices=reduction_indices)
# Next, determine the number of elements that weight would broadcast to:
broadcast_dims = array_ops.slice(array_ops.shape(losses),
[weight.get_shape().ndims], [-1])
num_to_broadcast = math_ops.to_float(math_ops.reduce_prod(broadcast_dims))
num_per_batch = math_ops.mul(num_nonzero_per_batch, num_to_broadcast)
return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
def unregularized_loss(self, examples):
"""Add operations to compute the loss (without the regularization loss).
Args:
examples: Examples to compute unregularized loss on.
Returns:
An Operation that computes mean (unregularized) loss for given set of
examples.
Raises:
ValueError: if examples are not well defined.
"""
self._assertSpecified(['example_labels', 'example_weights',
'sparse_features', 'dense_features'], examples)
self._assertList(['sparse_features', 'dense_features'], examples)
with name_scope('sdca/unregularized_loss'):
predictions = math_ops.cast(
self._linear_predictions(examples), dtypes.float64)
labels = math_ops.cast(
convert_to_tensor(examples['example_labels']), dtypes.float64)
weights = math_ops.cast(
convert_to_tensor(examples['example_weights']), dtypes.float64)
if self._options['loss_type'] == 'logistic_loss':
return math_ops.reduce_sum(math_ops.mul(
sigmoid_cross_entropy_with_logits(predictions, labels),
weights)) / math_ops.reduce_sum(weights)
if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']:
# hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
# first convert 0/1 labels into -1/1 labels.
all_ones = array_ops.ones_like(predictions)
adjusted_labels = math_ops.sub(2 * labels, all_ones)
# Tensor that contains (unweighted) error (hinge loss) per
# example.
error = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(adjusted_labels,
predictions)))
weighted_error = math_ops.mul(error, weights)
return math_ops.reduce_sum(weighted_error) / math_ops.reduce_sum(
weights)
# squared loss
err = math_ops.sub(labels, predictions)
weighted_squared_err = math_ops.mul(math_ops.square(err), weights)
# SDCA squared loss function is sum(err^2) / (2*sum(weights))
return (math_ops.reduce_sum(weighted_squared_err) /
(2.0 * math_ops.reduce_sum(weights)))