def create_metric_ops(self, _inputs, labels, predictions):
"""Creates the metric op"""
loss_mask = tf.sequence_mask(
lengths=tf.to_int32(labels["target_len"] - 1),
maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
return metrics.streaming_mean(predictions["losses"], loss_mask)
python类sequence_mask()的实例源码
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
"""Creates position embeddings.
Args:
embedding_dim: Dimensionality of the embeddings. An integer.
num_positions: The number of positions to be embedded. For example,
if you have inputs of length up to 100, this should be 100. An integer.
lengths: The lengths of the inputs to create position embeddings for.
An int32 tensor of shape `[batch_size]`.
maxlen: The maximum length of the input sequence to create position
embeddings for. An int32 tensor.
Returns:
A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
embeddings for each position. All elements past `lengths` are zero.
"""
# Create constant position encodings
position_encodings = tf.constant(
position_encoding(num_positions, embedding_dim),
name="position_encoding")
# Slice to size of current sequence
pe_slice = position_encodings[:maxlen, :]
# Replicate encodings for each element in the batch
batch_size = tf.shape(lengths)[0]
pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
# Mask out positions that are padded
positions_mask = tf.sequence_mask(
lengths=lengths, maxlen=maxlen, dtype=tf.float32)
positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
return positions_embed
def _mask(hypothesis, hypothesis_length, premise, premise_length):
p_mask = tf.sequence_mask(premise_length, tf.shape(premise)[1], dtype=tf.float32)
h_mask = tf.sequence_mask(hypothesis_length, tf.shape(hypothesis)[1], dtype=tf.float32)
premise *= tf.expand_dims(p_mask, 2)
hypothesis *= tf.expand_dims(h_mask, 2)
return hypothesis, premise
def mask_3d(sequences, sequence_lengths, mask_value, dimension=2):
"""
Given a batch of matrices, each with shape m x n, mask the values in each
row after the positions indicated in sentence_sizes.
This function is supposed to mask the last columns in the raw attention
matrix (e_{i, j}) in cases where the sentence2 is smaller than the
maximum.
Args:
sequences: tensor with shape (batch_size, m, n)
sequence_lengths: tensor with shape (batch_size) containing the sentence sizes that
should be limited
mask_value: scalar value to assign to items after sentence size
dimension: over which dimension to mask values
Returns:
A tensor with the same shape as `values`
"""
if dimension == 1:
sequences = tf.transpose(sequences, [0, 2, 1])
time_steps1, time_steps2 = tf.shape(sequences)[1], tf.shape(sequences)[2]
ones = tf.ones_like(sequences, dtype=tf.int32)
pad_values = mask_value * tf.cast(ones, tf.float32)
mask = tf.sequence_mask(sequence_lengths, time_steps2)
# mask is (batch_size, sentence2_size). we have to tile it for 3d
mask3d = tf.tile(tf.expand_dims(mask, 1), (1, time_steps1, 1))
masked = tf.where(mask3d, sequences, pad_values)
return tf.transpose(masked, [0, 2, 1]) if dimension == 1 else masked
def average_attention(hidden_states, encoder_input_length, *args, **kwargs):
# attention with fixed weights (average of all hidden states)
lengths = tf.to_float(tf.expand_dims(encoder_input_length, axis=1))
mask = tf.sequence_mask(encoder_input_length, maxlen=tf.shape(hidden_states)[1])
weights = tf.to_float(mask) / lengths
weighted_average = tf.reduce_sum(hidden_states * tf.expand_dims(weights, axis=2), axis=1)
return weighted_average, weights
def _create_position_embedding(self, lengths, maxlen):
# Slice to size of current sequence
pe_slice = self.pos_embed[2:maxlen+2, :]
# Replicate encodings for each element in the batch
batch_size = tf.shape(lengths)[0]
pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
# Mask out positions that are padded
positions_mask = tf.sequence_mask(
lengths=lengths, maxlen=maxlen, dtype=tf.float32)
positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
return positions_embed
def _create_position_embedding(self, lengths, maxlen):
# Slice to size of current sequence
pe_slice = self.pos_embed[2:maxlen+2, :]
# Replicate encodings for each element in the batch
batch_size = tf.shape(lengths)[0]
pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
# Mask out positions that are padded
positions_mask = tf.sequence_mask(
lengths=lengths, maxlen=maxlen, dtype=tf.float32)
positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
return positions_embed
def create_metric_ops(self, _inputs, labels, predictions):
"""Creates the metric op"""
loss_mask = tf.sequence_mask(
lengths=tf.to_int32(labels["target_len"] - 1),
maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
return metrics.streaming_mean(predictions["losses"], loss_mask)
def _mask_by_length(t, length):
maxlen = t.get_shape().as_list()[1]
mask = tf.sequence_mask(length, maxlen=maxlen)
mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
return t * mask
def create_metric_ops(self, _inputs, labels, predictions):
"""Creates the metric op"""
loss_mask = tf.sequence_mask(
lengths=tf.to_int32(labels["target_len"] - 1),
maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
return metrics.streaming_mean(predictions["losses"], loss_mask)
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
"""Creates position embeddings.
Args:
embedding_dim: Dimensionality of the embeddings. An integer.
num_positions: The number of positions to be embedded. For example,
if you have inputs of length up to 100, this should be 100. An integer.
lengths: The lengths of the inputs to create position embeddings for.
An int32 tensor of shape `[batch_size]`.
maxlen: The maximum length of the input sequence to create position
embeddings for. An int32 tensor.
Returns:
A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
embeddings for each position. All elements past `lengths` are zero.
"""
# Create constant position encodings
position_encodings = tf.constant(
_position_encoding(num_positions, embedding_dim),
name="position_encoding")
# Slice to size of current sequence
pe_slice = position_encodings[:maxlen, :]
# Replicate encodings for each element in the batch
batch_size = tf.shape(lengths)[0]
pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
# Mask out positions that are padded
positions_mask = tf.sequence_mask(
lengths=lengths, maxlen=maxlen, dtype=tf.float32)
positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
return positions_embed
def create_mask_for_keys(self, keys, keys_length):
# batch_size x keys_l
mask = 1 - tf.sequence_mask(lengths=keys_length, maxlen=keys.get_shape().as_list()[1], dtype=tf.float32)
mask *= -2 ** 30
mask = tf.expand_dims(tf.expand_dims(mask, 1), 1) # batch_size x 1 x 1 x keys_l
return mask
def create_mask_for_queries(self, queries, queries_len):
# batch_size x queries_l
mask = tf.sequence_mask(lengths=queries_len, maxlen=queries.get_shape().as_list()[1], dtype=tf.float32)
mask = tf.expand_dims(tf.expand_dims(mask, 1), -1) # batch_size x 1 x queries x 1
return mask
def _build(self, inputs, sequence_length, labels, encoder_output, encoder_sequence_length, embedding_lookup=None):
if embedding_lookup is None:
output = PositionnalEmbedding(**self.embed_params)(inputs)
else:
output = embedding_lookup(inputs)
output = tf.layers.dropout(
output, self.params.dropout_rate)
for _ in range(self.params.num_blocks):
output = DecoderBlock(**self.block_params)(output, sequence_length,
encoder_output, encoder_sequence_length)
logits = tf.contrib.layers.fully_connected(
output, self.params.vocab_size)
max_sequence_length = tf.shape(inputs)[1]
one_hot_labels = tf.one_hot(labels, self.params.vocab_size, axis=-1)
with tf.name_scope("loss"):
mask_loss = tf.sequence_mask(sequence_length, maxlen=max_sequence_length, dtype=tf.float32)
one_hot_labels = tf.reshape(one_hot_labels, [-1, self.params.vocab_size])
loss = tf.nn.softmax_cross_entropy_with_logits(logits=tf.reshape(logits, [-1, self.params.vocab_size]),
labels=one_hot_labels)
loss = tf.reshape(loss, [-1, max_sequence_length])
loss *= mask_loss
loss = tf.reduce_sum(loss, 1) / tf.reduce_sum(mask_loss, 1)
mean_loss = tf.reduce_mean(loss)
pred = tf.argmax(logits, axis=-1)
acc = tf.equal(pred, labels)
acc = tf.reduce_sum(tf.to_float(acc) * mask_loss, 1) / tf.reduce_sum(mask_loss, 1)
acc = tf.reduce_mean(acc, name="accuracy")
return mean_loss, tf.nn.log_softmax(logits)
def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends):
mention_emb_list = []
mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
mention_emb_list.append(mention_start_emb)
mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
mention_emb_list.append(mention_end_emb)
mention_width = 1 + mention_ends - mention_starts # [num_mentions]
if self.config["use_features"]:
mention_width_index = mention_width - 1 # [num_mentions]
mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb]
mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
mention_emb_list.append(mention_width_emb)
if self.config["model_heads"]:
mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width]
mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width]
mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
self.head_scores = util.projection(text_outputs, 1) # [num_words, 1]
mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]
mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1]
mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
mention_emb_list.append(mention_head_emb)
mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
return mention_emb
def mask_attn_score(score, memory_sequence_length, score_mask_value = -1e8):
score_mask = tf.sequence_mask(
memory_sequence_length, maxlen=score.shape[1])
score_mask_values = score_mask_value * tf.ones_like(score)
return tf.where(score_mask, score, score_mask_values)
def cross_entropy_sequence_loss(logits, targets, sequence_length):
with tf.name_scope("cross_entropy_sequence_loss"):
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=targets)
# Mask out the losses we don't care about
loss_mask = tf.sequence_mask(
tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
return losses
def apply(self, is_train, x, mask=None):
mask = tf.sequence_mask(mask, tf.shape(x)[1])
output = weight_layers(1, x, mask, self.l2_coef, do_layer_norm=self.layer_norm,
use_top_only=self.top_layer_only)["weighted_ops"][0]
return output
def compute_attention_mask(x_mask, mem_mask, x_word_dim, key_word_dim):
""" computes a (batch, x_word_dim, key_word_dim) bool mask for clients that want masking """
if x_mask is None and mem_mask is None:
return None
elif x_mask is None or mem_mask is None:
raise NotImplementedError()
x_mask = tf.sequence_mask(x_mask, x_word_dim)
mem_mask = tf.sequence_mask(mem_mask, key_word_dim)
join_mask = tf.logical_and(tf.expand_dims(x_mask, 2), tf.expand_dims(mem_mask, 1))
return join_mask
def get_mean_logit(self):
logits = (self.start_logits + self.end_logits) / 2.0
bol_mask = tf.sequence_mask(self.mask, tf.shape(self.start_logits)[1])
bol_mask = tf.cast(bol_mask, tf.float32)
return tf.reduce_sum(logits*bol_mask, axis=[1]) / tf.reduce_sum(bol_mask, axis=[1])