python类sequence_mask()的实例源码

metric_specs.py 文件源码 项目:seq2seq 作者: google 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def create_metric_ops(self, _inputs, labels, predictions):
    """Creates the metric op"""
    loss_mask = tf.sequence_mask(
        lengths=tf.to_int32(labels["target_len"] - 1),
        maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
    return metrics.streaming_mean(predictions["losses"], loss_mask)
pooling_encoder.py 文件源码 项目:seq2seq 作者: google 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
  """Creates position embeddings.

  Args:
    embedding_dim: Dimensionality of the embeddings. An integer.
    num_positions: The number of positions to be embedded. For example,
      if you have inputs of length up to 100, this should be 100. An integer.
    lengths: The lengths of the inputs to create position embeddings for.
      An int32 tensor of shape `[batch_size]`.
    maxlen: The maximum length of the input sequence to create position
      embeddings for. An int32 tensor.

  Returns:
    A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
    embeddings for each position. All elements past `lengths` are zero.
  """
  # Create constant position encodings
  position_encodings = tf.constant(
      position_encoding(num_positions, embedding_dim),
      name="position_encoding")

  # Slice to size of current sequence
  pe_slice = position_encodings[:maxlen, :]
  # Replicate encodings for each element in the batch
  batch_size = tf.shape(lengths)[0]
  pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

  # Mask out positions that are padded
  positions_mask = tf.sequence_mask(
      lengths=lengths, maxlen=maxlen, dtype=tf.float32)
  positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)

  return positions_embed
prediction_layer.py 文件源码 项目:jack 作者: uclmr 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _mask(hypothesis, hypothesis_length, premise, premise_length):
    p_mask = tf.sequence_mask(premise_length, tf.shape(premise)[1], dtype=tf.float32)
    h_mask = tf.sequence_mask(hypothesis_length, tf.shape(hypothesis)[1], dtype=tf.float32)

    premise *= tf.expand_dims(p_mask, 2)
    hypothesis *= tf.expand_dims(h_mask, 2)
    return hypothesis, premise
masking.py 文件源码 项目:jack 作者: uclmr 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def mask_3d(sequences, sequence_lengths, mask_value, dimension=2):
    """
    Given a batch of matrices, each with shape m x n, mask the values in each
    row after the positions indicated in sentence_sizes.
    This function is supposed to mask the last columns in the raw attention
    matrix (e_{i, j}) in cases where the sentence2 is smaller than the
    maximum.

    Args:
        sequences: tensor with shape (batch_size, m, n)
        sequence_lengths: tensor with shape (batch_size) containing the sentence sizes that
           should be limited
        mask_value: scalar value to assign to items after sentence size
        dimension: over which dimension to mask values
    Returns:
        A tensor with the same shape as `values`
    """
    if dimension == 1:
        sequences = tf.transpose(sequences, [0, 2, 1])
    time_steps1, time_steps2 = tf.shape(sequences)[1], tf.shape(sequences)[2]
    ones = tf.ones_like(sequences, dtype=tf.int32)
    pad_values = mask_value * tf.cast(ones, tf.float32)
    mask = tf.sequence_mask(sequence_lengths, time_steps2)
    # mask is (batch_size, sentence2_size). we have to tile it for 3d
    mask3d = tf.tile(tf.expand_dims(mask, 1), (1, time_steps1, 1))
    masked = tf.where(mask3d, sequences, pad_values)
    return tf.transpose(masked, [0, 2, 1]) if dimension == 1 else masked
models.py 文件源码 项目:seq2seq 作者: eske 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def average_attention(hidden_states, encoder_input_length, *args, **kwargs):
    # attention with fixed weights (average of all hidden states)
    lengths = tf.to_float(tf.expand_dims(encoder_input_length, axis=1))
    mask = tf.sequence_mask(encoder_input_length, maxlen=tf.shape(hidden_states)[1])
    weights = tf.to_float(mask) / lengths
    weighted_average = tf.reduce_sum(hidden_states * tf.expand_dims(weights, axis=2), axis=1)
    return weighted_average, weights
conv_decoder_fairseq.py 文件源码 项目:conv_seq2seq 作者: tobyyouup 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _create_position_embedding(self, lengths, maxlen):

    # Slice to size of current sequence
    pe_slice = self.pos_embed[2:maxlen+2, :]
    # Replicate encodings for each element in the batch
    batch_size = tf.shape(lengths)[0]
    pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

    # Mask out positions that are padded
    positions_mask = tf.sequence_mask(
        lengths=lengths, maxlen=maxlen, dtype=tf.float32)
    positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)

    return positions_embed
conv_decoder_fairseq_bs.py 文件源码 项目:conv_seq2seq 作者: tobyyouup 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _create_position_embedding(self, lengths, maxlen):

    # Slice to size of current sequence
    pe_slice = self.pos_embed[2:maxlen+2, :]
    # Replicate encodings for each element in the batch
    batch_size = tf.shape(lengths)[0]
    pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

    # Mask out positions that are padded
    positions_mask = tf.sequence_mask(
        lengths=lengths, maxlen=maxlen, dtype=tf.float32)
    positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)

    return positions_embed
metric_specs.py 文件源码 项目:conv_seq2seq 作者: tobyyouup 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def create_metric_ops(self, _inputs, labels, predictions):
    """Creates the metric op"""
    loss_mask = tf.sequence_mask(
        lengths=tf.to_int32(labels["target_len"] - 1),
        maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
    return metrics.streaming_mean(predictions["losses"], loss_mask)
losses.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _mask_by_length(t, length):
    maxlen = t.get_shape().as_list()[1]
    mask = tf.sequence_mask(length, maxlen=maxlen)
    mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
    return t * mask
metrics.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def create_metric_ops(self, _inputs, labels, predictions):
        """Creates the metric op"""
        loss_mask = tf.sequence_mask(
            lengths=tf.to_int32(labels["target_len"] - 1),
            maxlen=tf.to_int32(tf.shape(predictions["losses"])[1]))
        return metrics.streaming_mean(predictions["losses"], loss_mask)
encoder.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
    """Creates position embeddings.

    Args:
      embedding_dim: Dimensionality of the embeddings. An integer.
      num_positions: The number of positions to be embedded. For example,
        if you have inputs of length up to 100, this should be 100. An integer.
      lengths: The lengths of the inputs to create position embeddings for.
        An int32 tensor of shape `[batch_size]`.
      maxlen: The maximum length of the input sequence to create position
        embeddings for. An int32 tensor.

    Returns:
      A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
      embeddings for each position. All elements past `lengths` are zero.
    """
    # Create constant position encodings
    position_encodings = tf.constant(
        _position_encoding(num_positions, embedding_dim),
        name="position_encoding")

    # Slice to size of current sequence
    pe_slice = position_encodings[:maxlen, :]
    # Replicate encodings for each element in the batch
    batch_size = tf.shape(lengths)[0]
    pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

    # Mask out positions that are padded
    positions_mask = tf.sequence_mask(
        lengths=lengths, maxlen=maxlen, dtype=tf.float32)
    positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)

    return positions_embed
multihead_attention.py 文件源码 项目:attention 作者: louishenrifranc 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_mask_for_keys(self, keys, keys_length):
        # batch_size x keys_l
        mask = 1 - tf.sequence_mask(lengths=keys_length, maxlen=keys.get_shape().as_list()[1], dtype=tf.float32)
        mask *= -2 ** 30
        mask = tf.expand_dims(tf.expand_dims(mask, 1), 1)  # batch_size x 1 x 1 x keys_l
        return mask
multihead_attention.py 文件源码 项目:attention 作者: louishenrifranc 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def create_mask_for_queries(self, queries, queries_len):
        # batch_size x queries_l
        mask = tf.sequence_mask(lengths=queries_len, maxlen=queries.get_shape().as_list()[1], dtype=tf.float32)
        mask = tf.expand_dims(tf.expand_dims(mask, 1), -1)  # batch_size x 1 x queries x 1
        return mask
decoder.py 文件源码 项目:attention 作者: louishenrifranc 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _build(self, inputs, sequence_length, labels, encoder_output, encoder_sequence_length, embedding_lookup=None):
        if embedding_lookup is None:
            output = PositionnalEmbedding(**self.embed_params)(inputs)
        else:
            output = embedding_lookup(inputs)
        output = tf.layers.dropout(
            output, self.params.dropout_rate)

        for _ in range(self.params.num_blocks):
            output = DecoderBlock(**self.block_params)(output, sequence_length,
                                                       encoder_output, encoder_sequence_length)

        logits = tf.contrib.layers.fully_connected(
            output, self.params.vocab_size)

        max_sequence_length = tf.shape(inputs)[1]
        one_hot_labels = tf.one_hot(labels, self.params.vocab_size, axis=-1)
        with tf.name_scope("loss"):
            mask_loss = tf.sequence_mask(sequence_length, maxlen=max_sequence_length, dtype=tf.float32)
            one_hot_labels = tf.reshape(one_hot_labels, [-1, self.params.vocab_size])
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=tf.reshape(logits, [-1, self.params.vocab_size]),
                                                           labels=one_hot_labels)
            loss = tf.reshape(loss, [-1, max_sequence_length])
            loss *= mask_loss
            loss = tf.reduce_sum(loss, 1) / tf.reduce_sum(mask_loss, 1)
            mean_loss = tf.reduce_mean(loss)

            pred = tf.argmax(logits, axis=-1)
            acc = tf.equal(pred, labels)
            acc = tf.reduce_sum(tf.to_float(acc) * mask_loss, 1) / tf.reduce_sum(mask_loss, 1)
            acc = tf.reduce_mean(acc, name="accuracy")
        return mean_loss, tf.nn.log_softmax(logits)
coref_model.py 文件源码 项目:e2e-coref 作者: kentonl 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends):
    mention_emb_list = []

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_emb_list.append(mention_start_emb)

    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_emb_list.append(mention_end_emb)

    mention_width = 1 + mention_ends - mention_starts # [num_mentions]
    if self.config["use_features"]:
      mention_width_index = mention_width - 1 # [num_mentions]
      mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb]
      mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
      mention_emb_list.append(mention_width_emb)

    if self.config["model_heads"]:
      mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width]
      mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width]
      mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
      self.head_scores = util.projection(text_outputs, 1) # [num_words, 1]
      mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
      mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]
      mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1]
      mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
      mention_emb_list.append(mention_head_emb)

    mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
    return mention_emb
layers.py 文件源码 项目:R-net 作者: minsangkim142 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def mask_attn_score(score, memory_sequence_length, score_mask_value = -1e8):
    score_mask = tf.sequence_mask(
        memory_sequence_length, maxlen=score.shape[1])
    score_mask_values = score_mask_value * tf.ones_like(score)
    return tf.where(score_mask, score, score_mask_values)
test_seq2seq.py 文件源码 项目:sequencing 作者: SwordYork 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def cross_entropy_sequence_loss(logits, targets, sequence_length):
    with tf.name_scope("cross_entropy_sequence_loss"):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=targets)

        # Mask out the losses we don't care about
        loss_mask = tf.sequence_mask(
            tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
        losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])

        return losses
elmo.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def apply(self, is_train, x, mask=None):
        mask = tf.sequence_mask(mask, tf.shape(x)[1])
        output = weight_layers(1, x, mask, self.l2_coef, do_layer_norm=self.layer_norm,
                               use_top_only=self.top_layer_only)["weighted_ops"][0]
        return output
similarity_layers.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def compute_attention_mask(x_mask, mem_mask, x_word_dim, key_word_dim):
    """ computes a (batch, x_word_dim, key_word_dim) bool mask for clients that want masking """
    if x_mask is None and mem_mask is None:
        return None
    elif x_mask is None or mem_mask is None:
        raise NotImplementedError()

    x_mask = tf.sequence_mask(x_mask, x_word_dim)
    mem_mask = tf.sequence_mask(mem_mask, key_word_dim)
    join_mask = tf.logical_and(tf.expand_dims(x_mask, 2), tf.expand_dims(mem_mask, 1))
    return join_mask
span_prediction.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_mean_logit(self):
        logits = (self.start_logits + self.end_logits) / 2.0
        bol_mask = tf.sequence_mask(self.mask, tf.shape(self.start_logits)[1])
        bol_mask = tf.cast(bol_mask, tf.float32)
        return tf.reduce_sum(logits*bol_mask, axis=[1]) / tf.reduce_sum(bol_mask, axis=[1])


问题


面经


文章

微信
公众号

扫码关注公众号