def next_inputs(self, time, outputs, state, sample_ids, name=None):
with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample",
[time, outputs, state, sample_ids]):
(finished, base_next_inputs, state) = (
super(ScheduledEmbeddingTrainingHelper, self).next_inputs(
time=time,
outputs=outputs,
state=state,
sample_ids=sample_ids,
name=name))
def maybe_sample():
"""Perform scheduled sampling."""
where_sampling = math_ops.cast(
array_ops.where(sample_ids > -1), dtypes.int32)
where_not_sampling = math_ops.cast(
array_ops.where(sample_ids <= -1), dtypes.int32)
where_sampling_flat = array_ops.reshape(where_sampling, [-1])
where_not_sampling_flat = array_ops.reshape(where_not_sampling, [-1])
sample_ids_sampling = array_ops.gather(sample_ids, where_sampling_flat)
inputs_not_sampling = array_ops.gather(
base_next_inputs, where_not_sampling_flat)
sampled_next_inputs = self._embedding_fn(sample_ids_sampling)
base_shape = array_ops.shape(base_next_inputs)
return (array_ops.scatter_nd(indices=where_sampling,
updates=sampled_next_inputs,
shape=base_shape)
+ array_ops.scatter_nd(indices=where_not_sampling,
updates=inputs_not_sampling,
shape=base_shape))
all_finished = math_ops.reduce_all(finished)
next_inputs = control_flow_ops.cond(
all_finished, lambda: base_next_inputs, maybe_sample)
return (finished, next_inputs, state)
python类cast()的实例源码
def sample(self, time, outputs, state, name=None):
with ops.name_scope(name, "ScheduledOutputTrainingHelperSample",
[time, outputs, state]):
sampler = bernoulli.Bernoulli(probs=self._sampling_probability)
return math_ops.cast(
sampler.sample(sample_shape=self.batch_size, seed=self._seed),
dtypes.bool)
def sample(self, time, outputs, state, name=None):
"""sample for GreedyEmbeddingHelper."""
del time, state # unused by sample_fn
# Outputs are logits, use argmax to get the most probable id
if not isinstance(outputs, ops.Tensor):
raise TypeError("Expected outputs to be a single Tensor, got: %s" %
type(outputs))
sample_ids = math_ops.cast(
math_ops.argmax(outputs, axis=-1), dtypes.int32)
return sample_ids
def sequence_loss(logits, targets, weights,
average_across_timesteps=True, average_across_batch=True,
softmax_loss_function=None, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.name_scope( name, "sequence_loss",logits + targets + weights):
cost = math_ops.reduce_sum(sequence_loss_by_example(
logits, targets, weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
if average_across_batch:
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, dtypes.float32)
else:
return cost
def sequence_loss(logits,
targets,
weights,
average_across_timesteps=True,
average_across_batch=True,
softmax_loss_function=None,
name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.name_scope(name, "sequence_loss", logits + targets + weights):
cost = math_ops.reduce_sum(
sequence_loss_by_example(
logits,
targets,
weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
if average_across_batch:
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, cost.dtype)
else:
return cost
def _apply_sparse(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
clip_multiplier_t = math_ops.cast(self.clip_multiplier_t, var.dtype.base_dtype)
clip_epsilon_t = math_ops.cast(self.clip_epsilon_t, var.dtype.base_dtype)
v = self.get_slot(var, "v")
v_slice = array_ops.gather(v, grad.indices)
#clip gradient so that each value exceeds its previous maximum by no more than clip_multiplier
clipped_values = grad.values
if self.clip_gradients:
clipVal = v_slice * clip_multiplier_t + clip_epsilon_t
clipped_values = clip_ops.clip_by_value(grad.values, -clipVal, clipVal)
# m := beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t_values = beta1_t * array_ops.gather(m, grad.indices) + (1 - beta1_t) * clipped_values
m_t = state_ops.scatter_update(m, grad.indices, m_t_values, use_locking=self._use_locking)
# v := max(beta2 * v , abs(grad))
v_t_values = math_ops.maximum(beta2_t * v_slice, math_ops.abs(clipped_values))
v_t = state_ops.scatter_update(v, grad.indices, v_t_values, use_locking=self._use_locking)
# variable -= learning_rate * m_t / (epsilon_t + v_t)
# we do not use bias-correction term for the first moment; it does not give observable benefit
var_update = state_ops.scatter_sub(var, grad.indices,
lr_t * m_t_values / (v_t_values + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(var_update, v_t, m_t)
def sequence_loss_by_batch(logits, targets, weights, average_across_timesteps=True,
softmax_loss_function=None, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed (averaged).
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.op_scope(logits + targets + weights, name, "sequence_loss_by_batch"):
cost = math_ops.reduce_sum(sequence_loss_by_example(
logits, targets, weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, dtypes.float32)
def sample(self, time, outputs, name=None, **unused_kwargs):
with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
sample_ids = math_ops.cast(
math_ops.argmax(outputs, axis=-1), dtypes.int32)
return sample_ids
def next_inputs(self, time, outputs, state, sample_ids, name=None):
with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample",
[time, outputs, state, sample_ids]):
(finished, base_next_inputs, state) = (
super(ScheduledEmbeddingTrainingHelper, self).next_inputs(
time=time,
outputs=outputs,
state=state,
sample_ids=sample_ids,
name=name))
def maybe_sample():
"""Perform scheduled sampling."""
where_sampling = math_ops.cast(
array_ops.where(sample_ids > -1), dtypes.int32)
where_not_sampling = math_ops.cast(
array_ops.where(sample_ids <= -1), dtypes.int32)
where_sampling_flat = array_ops.reshape(where_sampling, [-1])
where_not_sampling_flat = array_ops.reshape(where_not_sampling, [-1])
sample_ids_sampling = array_ops.gather(sample_ids, where_sampling_flat)
inputs_not_sampling = array_ops.gather(
base_next_inputs, where_not_sampling_flat)
sampled_next_inputs = self._embedding_fn(sample_ids_sampling)
base_shape = array_ops.shape(base_next_inputs)
return (array_ops.scatter_nd(indices=where_sampling,
updates=sampled_next_inputs,
shape=base_shape)
+ array_ops.scatter_nd(indices=where_not_sampling,
updates=inputs_not_sampling,
shape=base_shape))
all_finished = math_ops.reduce_all(finished)
next_inputs = control_flow_ops.cond(
all_finished, lambda: base_next_inputs, maybe_sample)
return (finished, next_inputs, state)
def sample(self, time, outputs, state, name=None):
with ops.name_scope(name, "ScheduledOutputTrainingHelperSample",
[time, outputs, state]):
sampler = bernoulli.Bernoulli(probs=self._sampling_probability)
return math_ops.cast(
sampler.sample(sample_shape=self.batch_size, seed=self._seed),
dtypes.bool)
def sample(self, time, outputs, state, name=None):
"""sample for GreedyEmbeddingHelper."""
del time, state # unused by sample_fn
# Outputs are logits, use argmax to get the most probable id
if not isinstance(outputs, ops.Tensor):
raise TypeError("Expected outputs to be a single Tensor, got: %s" %
type(outputs))
sample_ids = math_ops.cast(
math_ops.argmax(outputs, axis=-1), dtypes.int32)
return sample_ids
def sequence_loss(targets,
logits,
weights,
average_across_timesteps=True,
average_across_batch=True,
softmax_loss_function=None,
name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.name_scope(name, "sequence_loss", logits + targets + weights):
cost = math_ops.reduce_sum(
sequence_loss_by_example(
targets,
logits,
weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
if average_across_batch:
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, cost.dtype)
else:
return cost
def sequence_loss(logits,
targets,
weights,
name):
"""TODO(nh2tran): docstring.
Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
#~ with tf.name_scope(name=name,
#~ values=logits + targets + weights):
with ops.op_scope(logits + targets + weights, name):
cost = math_ops.reduce_sum(sequence_loss_per_sample(logits,
targets,
weights))
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, dtypes.float32)
def _reverse_seq(input_seq, lengths):
"""Reverse a list of Tensors up to specified lengths.
Args:
input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
lengths: A tensor of dimension batch_size, containing lengths for each
sequence in the batch. If "None" is specified, simply reverses
the list.
Returns:
time-reversed sequence
"""
if lengths is None:
return list(reversed(input_seq))
input_shape = tensor_shape.matrix(None, None)
for input_ in input_seq:
input_shape.merge_with(input_.get_shape())
input_.set_shape(input_shape)
# Join into (time, batch_size, depth)
s_joined = array_ops.pack(input_seq)
# TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
if lengths is not None:
lengths = math_ops.to_int64(lengths)
# Reverse along dimension 0
s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
# Split again into list
result = array_ops.unpack(s_reversed)
for r in result:
r.set_shape(input_shape)
return result
def sequence_loss(logits, targets, weights,
average_across_timesteps=True, average_across_batch=True,
softmax_loss_function=None, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.op_scope(logits + targets + weights, name, "sequence_loss"):
cost = math_ops.reduce_sum(sequence_loss_by_example(
logits, targets, weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
if average_across_batch:
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, dtypes.float32)
else:
return cost
def sequence_loss(logits, targets, weights,
average_across_timesteps=True, average_across_batch=True,
softmax_loss_function=None, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
to be used instead of the standard softmax (the default if this is None).
name: Optional name for this operation, defaults to "sequence_loss".
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(logits) is different from len(targets) or len(weights).
"""
with ops.name_scope(name, "sequence_loss", logits + targets + weights):
cost = math_ops.reduce_sum(sequence_loss_by_example(
logits, targets, weights,
average_across_timesteps=average_across_timesteps,
softmax_loss_function=softmax_loss_function))
if average_across_batch:
batch_size = array_ops.shape(targets[0])[0]
return cost / math_ops.cast(batch_size, cost.dtype)
else:
return cost
def create_queues(hypes, phase):
"""Create Queues."""
arch = hypes['arch']
dtypes = [tf.float32, tf.int32]
shape_known = hypes['jitter']['reseize_image'] or hypes['jitter']['crop_patch']
if shape_known:
if hypes['jitter']['crop_patch']:
height = hypes['jitter']['patch_height']
width = hypes['jitter']['patch_width']
else:
height = hypes['jitter']['image_height']
width = hypes['jitter']['image_width']
channel = hypes['arch']['num_channels']
num_classes = hypes['arch']['num_classes']
shapes = [[height, width, channel],
[height, width, num_classes]]
else:
shapes = None
capacity = 50
q = tf.FIFOQueue(capacity=50, dtypes=dtypes, shapes=shapes)
tf.summary.scalar("queue/%s/fraction_of_%d_full" %
(q.name + "_" + phase, capacity),
math_ops.cast(q.size(), tf.float32) * (1. / capacity))
return q
def _to_tensor(x, dtype):
"""Convert the input `x` to a tensor of type `dtype`.
Arguments:
x: An object to be converted (numpy array, list, tensors).
dtype: The destination type.
Returns:
A tensor.
"""
x = ops.convert_to_tensor(x)
if x.dtype != dtype:
x = math_ops.cast(x, dtype)
return x
def cast(x, dtype):
"""Casts a tensor to a different dtype and returns it.
You can cast a Keras variable but it still returns a Keras tensor.
Arguments:
x: Keras tensor (or variable).
dtype: String, either (`'float16'`, `'float32'`, or `'float64'`).
Returns:
Keras tensor with dtype `dtype`.
Example:
```python
>>> from keras import backend as K
>>> input = K.placeholder((2, 3), dtype='float32')
>>> input
<tf.Tensor 'Placeholder_2:0' shape=(2, 3) dtype=float32>
# It doesn't work in-place as below.
>>> K.cast(input, dtype='float16')
<tf.Tensor 'Cast_1:0' shape=(2, 3) dtype=float16>
>>> input
<tf.Tensor 'Placeholder_2:0' shape=(2, 3) dtype=float32>
# you need to assign it.
>>> input = K.cast(input, dtype='float16')
>>> input
<tf.Tensor 'Cast_2:0' shape=(2, 3) dtype=float16>
"""
return math_ops.cast(x, dtype)
# UPDATES OPS
```
def all(x, axis=None, keepdims=False):
"""Bitwise reduction (logical AND).
Arguments:
x: Tensor or variable.
axis: axis along which to perform the reduction.
keepdims: whether the drop or broadcast the reduction axes.
Returns:
A uint8 tensor (0s and 1s).
"""
axis = _normalize_axis(axis, ndim(x))
x = math_ops.cast(x, dtypes_module.bool)
return math_ops.reduce_all(x, reduction_indices=axis, keep_dims=keepdims)