python类reduce_logsumexp()的实例源码

parser.py 文件源码 项目:arc-swift 作者: qipeng 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def ASw_transition_loss_pred(self, i, j, combined_head, combined_dep, transition_logit, SHIFT):
        # extract relevant portions of params
        rel_trans_feat_ids = self.trans_feat_ids[i*self.args.beam_size+j] if not self.train else self.trans_feat_ids[i, j]
        rel_trans_feat_size = self.trans_feat_sizes[i*self.args.beam_size+j] if not self.train else self.trans_feat_sizes[i, j]

        # core computations
        has_shift = tf.cond(tf.equal(rel_trans_feat_ids[0, 0], SHIFT), lambda: tf.constant(1), lambda: tf.constant(0))
        arc_trans_count = rel_trans_feat_size - has_shift

        arc_trans_feat_ids = tf.gather(rel_trans_feat_ids, tf.range(has_shift, rel_trans_feat_size))
        rel_head = tf.reshape(tf.gather(combined_head, arc_trans_feat_ids[:, 1]), [arc_trans_count, self.args.rel_emb_dim])
        rel_dep  = tf.reshape(tf.gather(combined_dep,  arc_trans_feat_ids[:, 2]), [arc_trans_count, self.args.rel_emb_dim])

        rel_hid = self.rel_merge(rel_head, rel_dep)
        rel_logit = self.rel_dense(rel_hid)
        arc_logit = tf.reshape(rel_logit, [-1])

        def logaddexp(a, b):
            mx = tf.maximum(a, b)
            return tf.log(tf.exp(a-mx) + tf.exp(b-mx)) + mx

        if self.train:
            # compute a loss and return it
            log_partition = tf.reduce_logsumexp(arc_logit)
            log_partition = tf.cond(tf.greater(has_shift, 0),
                    lambda: logaddexp(log_partition, transition_logit[rel_trans_feat_ids[0, 3]]),
                    lambda: log_partition)
            arc_logit = log_partition - arc_logit

            res = tf.cond(tf.greater(has_shift, 0),
                        lambda: tf.cond(tf.greater(self.trans_labels[i, j], 0),
                            lambda: arc_logit[self.trans_labels[i, j]-1],
                            lambda: log_partition - transition_logit[rel_trans_feat_ids[0, 3]]),
                        lambda: arc_logit[self.trans_labels[i, j]])

            return res
        else:
            # just return predictions
            arc_logit = tf.reshape(rel_logit, [-1])
            log_partition = tf.reduce_logsumexp(arc_logit)
            log_partition = tf.cond(tf.greater(has_shift, 0),
                    lambda: logaddexp(log_partition, transition_logit[rel_trans_feat_ids[0, 3]]),
                    lambda: log_partition)
            arc_logit = log_partition - arc_logit

            arc_pred = tf.cond(tf.greater(has_shift, 0),
                lambda: tf.concat([tf.reshape(log_partition - transition_logit[rel_trans_feat_ids[0, 3]], (-1,1)),
                         tf.reshape(arc_logit, (-1,1))], 0),
                lambda: tf.reshape(arc_logit, (-1, 1)))

            # correct shape
            current_output_shape = has_shift + arc_trans_count * rel_logit.get_shape()[1]
            arc_pred = tf.concat([arc_pred, 1e20 * tf.ones((tf.subtract(self.pred_output_size, current_output_shape), 1), dtype=tf.float32)], 0)
            arc_pred = tf.reshape(arc_pred, [-1])

            return arc_pred
library.py 文件源码 项目:text2text 作者: google 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def bow_loss_by_example(logits,
                        targets,
                        weights,
                        average_across_timesteps=False):
  """Loss for a bow of logits (per example).

  As opposed to sequence loss this is supposed to ignore the order.
  Does not seem to work yet.

  Args:
    logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
    targets: List of 1D batch-sized int32 Tensors of the same length as
      logits.
    weights: List of 1D batch-sized float-Tensors of the same length as
      logits.
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.

  Returns:
    1D batch-sized float Tensor: The loss for each bow.

  Raises:
    ValueError: If len(logits) is different from len(targets) or len(weights).
  """
  if len(targets) != len(logits) or len(weights) != len(logits):
    raise ValueError('Lengths of logits, weights, and targets must be the same '
                     '%d, %d, %d.' % (len(logits), len(weights), len(targets)))

  batch_size = logits[0].shape[0]
  vocab_size = logits[0].shape[1]

  logitssum = tf.zeros((batch_size, vocab_size), tf.float32)
  targetset = tf.zeros((batch_size, vocab_size), tf.float32)
  for target, weight in zip(targets, weights):
    targetset += (tf.one_hot(target, vocab_size) * weight[:, None])
  weight = tf.ones((batch_size), tf.float32)
  for logit in logits:
    softmax = tf.nn.softmax(logit)
    logitssum += (logitssum * weight[:, None])
    weight = tf.maximum(0.0, weight - softmax[:, 3])

  # logitssum = tf.minimum(logitssum, 1.0)
  # targetset = tf.minimum(targetset, 1.0)
  # loss = tf.nn.sigmoid_cross_entropy_with_logits(
  #     labels=targetset, logits=logitssum)

  loss = tf.reduce_sum(tf.squared_difference(logitssum, targetset), axis=1)

  # crossent = tf.maximum(logitssum, 0.0) - (
  #     logitssum * targetset) + tf.log(1.0 + tf.exp(-1.0 * tf.abs(logitssum)))
  # log_perps = tf.reduce_logsumexp(crossent, axis=1)

  if average_across_timesteps:
    total_size = tf.add_n(weights)
    total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
    loss /= total_size

  return loss
loss.py 文件源码 项目:tensorflow-layer-library 作者: bioinf-jku 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def blurred_cross_entropy(output, target, filter_size=11, sampling_range=3.5, pixel_weights=None):
    """
    Apply a Gaussian smoothing filter to the target probabilities (i.e. the one-hot 
    representation of target) and compute the cross entropy loss between softmax(output) 
    and the blurred target probabilities. 

    :param output: A rank-4 or rank-5 tensor with shape=(samples, [sequence_position,] x, y, num_classes) 
        representing the network input of the output layer (not activated)
    :param target: A rank-3 or rank-4 tensor with shape=(samples, [sequence_position,] x, y) representing 
        the target labels. It must contain int values in 0..num_classes-1. 
    :param filter_size: A length-2 list of int specifying the size of the Gaussian filter that will be 
        applied to the target probabilities. 
    :param pixel_weights: A rank-3 or rank-4 tensor with shape=(samples, [sequence_position,] x, y) 
        representing factors, that will be applied to the loss of the corresponding pixel. This can be 
        e.g. used to void certain pixels by weighting them to 0, i.e. suppress their error induction. 
    :return: A scalar operation representing the blurred cross entropy loss. 
    """
    # convert target to one-hot
    output_shape = output.shape.as_list()
    one_hot = tf.one_hot(target, output_shape[-1], dtype=tf.float32)

    if (len(output_shape) > 4):
        one_hot = tf.reshape(one_hot, [np.prod(output_shape[:-3])] + output_shape[-3:])

    # blur target probabilities
    #gauss_filter = weight_gauss_conv2d(filter_size + [output_shape[-1], 1])
    #blurred_target = tf.nn.depthwise_conv2d(one_hot, gauss_filter, [1, 1, 1, 1], 'SAME')
    blurred_target = gaussian_blur(one_hot, filter_size, sampling_range)

    if (len(output_shape) > 4):
        blurred_target = tf.reshape(blurred_target, output_shape)

    # compute log softmax predictions and cross entropy
    log_pred = output - tf.reduce_logsumexp(output, axis=[len(output_shape) - 1], keep_dims=True)

    # Apply pixel-wise weighting
    if pixel_weights is not None:
        log_pred *= pixel_weights

    cross_entropy = -tf.reduce_sum(blurred_target * log_pred, axis=[len(output_shape)-1])

    if pixel_weights is not None:
        loss = tf.reduce_sum(cross_entropy) / tf.reduce_sum(pixel_weights)
    else:
        loss = tf.reduce_mean(cross_entropy)

    return loss
variational_inference.py 文件源码 项目:proximity_vi 作者: altosaar 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def build_elbo(self, n_samples, training=False):
    cfg = self.config
    reuse = False
    if training:
      reuse = True
    z = self.variational.sample(self.data, n_samples=n_samples, reuse=reuse)
    log_q_z = self.variational.log_prob(z, reuse=reuse)
    self.log_q_z = log_q_z
    log_p_x_z = self.model.log_prob(self.data, z, reuse=reuse)
    if cfg['optim/deterministic_annealing'] and training:
      self.build_magnitude()
      tf.summary.scalar('c/magnitude', self.magnitude)
      magnitude = tf.maximum(1., self.magnitude)
      elbo = log_p_x_z - magnitude * log_q_z
    else:
      elbo = log_p_x_z - log_q_z
    if training:
      self.elbo_loss = elbo
      _, variance = tf.nn.moments(elbo, [0])
      self.elbo_variance = tf.reduce_mean(variance)
      self.log_q_z_loss = log_q_z
      self.variational.build_entropy(z)
      self.q_z_sample = z
      slim.summarize_collection('variational')
      slim.summarize_collection('model')
      slim.summarize_activations('variational')
      slim.summarize_activations('model')
    else:
      self.elbo = elbo
      self.log_q_z = log_q_z
      self.log_p_x_hat = (tf.reduce_logsumexp(elbo, [0], keep_dims=True) -
                          tf.log(float(cfg['q/n_samples_stats'])))
      tf.summary.scalar('o/log_p_x_hat', tf.reduce_mean(self.log_p_x_hat))

      def sum_mean(x): return tf.reduce_sum(tf.reduce_mean(x, 0))
      self.elbo_sum = sum_mean(elbo)
      self.q_entropy = -sum_mean(log_q_z)
      self.E_log_lik = sum_mean(log_p_x_z)
      tf.summary.scalar('o/elbo_sum', sum_mean(elbo))
      tf.summary.scalar('o/elbo_mean', sum_mean(elbo) / cfg['batch_size'])
      tf.summary.scalar('o/E_log_q_z', sum_mean(log_q_z))
      tf.summary.scalar('o/E_log_p_x_z', self.E_log_lik)
span_prediction.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def predict(self, answer, start_logits, end_logits, mask) -> Prediction:
        bound = self.bound
        f1_weight = self.f1_weight
        aggregate = self.aggregate
        masked_logits1 = exp_mask(start_logits, mask)
        masked_logits2 = exp_mask(end_logits, mask)

        span_logits = []
        for i in range(self.bound):
            if i == 0:
                span_logits.append(masked_logits1 + masked_logits2)
            else:
                span_logits.append(masked_logits1[:, :-i] + masked_logits2[:, i:])
        span_logits = tf.concat(span_logits, axis=1)
        l = tf.shape(start_logits)[1]

        if len(answer) == 1:
            answer = answer[0]
            if answer.dtype == tf.int32:
                if f1_weight == 0:
                    answer_ix = to_packed_coordinates(answer, l, bound)
                    loss = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=span_logits, labels=answer_ix))
                else:
                    f1_mask = packed_span_f1_mask(answer, l, bound)
                    if f1_weight < 1:
                        f1_mask *= f1_weight
                        f1_mask += (1 - f1_weight) * tf.one_hot(to_packed_coordinates(answer, l, bound), l)
                    # TODO can we stay in log space?  (actually its tricky since f1_mask can have zeros...)
                    probs = tf.nn.softmax(span_logits)
                    loss = -tf.reduce_mean(tf.log(tf.reduce_sum(probs * f1_mask, axis=1)))
            else:
                log_norm = tf.reduce_logsumexp(span_logits, axis=1)
                if aggregate == "sum":
                    log_score = tf.reduce_logsumexp(
                        span_logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)),
                        axis=1)
                elif aggregate == "max":
                    log_score = tf.reduce_max(span_logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)),
                                              axis=1)
                else:
                    raise NotImplementedError()
                loss = tf.reduce_mean(-(log_score - log_norm))
        else:
            raise NotImplementedError()

        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
        return PackedSpanPrediction(span_logits, l, bound)


问题


面经


文章

微信
公众号

扫码关注公众号