python类reduce_logsumexp()的实例源码-第2页-面圈网

parser.py 文件源码项目：arc-swift 作者: qipeng 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def ASw_transition_loss_pred(self, i, j, combined_head, combined_dep, transition_logit, SHIFT):
        # extract relevant portions of params
        rel_trans_feat_ids = self.trans_feat_ids[i*self.args.beam_size+j] if not self.train else self.trans_feat_ids[i, j]
        rel_trans_feat_size = self.trans_feat_sizes[i*self.args.beam_size+j] if not self.train else self.trans_feat_sizes[i, j]

        # core computations
        has_shift = tf.cond(tf.equal(rel_trans_feat_ids[0, 0], SHIFT), lambda: tf.constant(1), lambda: tf.constant(0))
        arc_trans_count = rel_trans_feat_size - has_shift

        arc_trans_feat_ids = tf.gather(rel_trans_feat_ids, tf.range(has_shift, rel_trans_feat_size))
        rel_head = tf.reshape(tf.gather(combined_head, arc_trans_feat_ids[:, 1]), [arc_trans_count, self.args.rel_emb_dim])
        rel_dep  = tf.reshape(tf.gather(combined_dep,  arc_trans_feat_ids[:, 2]), [arc_trans_count, self.args.rel_emb_dim])

        rel_hid = self.rel_merge(rel_head, rel_dep)
        rel_logit = self.rel_dense(rel_hid)
        arc_logit = tf.reshape(rel_logit, [-1])

        def logaddexp(a, b):
            mx = tf.maximum(a, b)
            return tf.log(tf.exp(a-mx) + tf.exp(b-mx)) + mx

        if self.train:
            # compute a loss and return it
            log_partition = tf.reduce_logsumexp(arc_logit)
            log_partition = tf.cond(tf.greater(has_shift, 0),
                    lambda: logaddexp(log_partition, transition_logit[rel_trans_feat_ids[0, 3]]),
                    lambda: log_partition)
            arc_logit = log_partition - arc_logit

            res = tf.cond(tf.greater(has_shift, 0),
                        lambda: tf.cond(tf.greater(self.trans_labels[i, j], 0),
                            lambda: arc_logit[self.trans_labels[i, j]-1],
                            lambda: log_partition - transition_logit[rel_trans_feat_ids[0, 3]]),
                        lambda: arc_logit[self.trans_labels[i, j]])

            return res
        else:
            # just return predictions
            arc_logit = tf.reshape(rel_logit, [-1])
            log_partition = tf.reduce_logsumexp(arc_logit)
            log_partition = tf.cond(tf.greater(has_shift, 0),
                    lambda: logaddexp(log_partition, transition_logit[rel_trans_feat_ids[0, 3]]),
                    lambda: log_partition)
            arc_logit = log_partition - arc_logit

            arc_pred = tf.cond(tf.greater(has_shift, 0),
                lambda: tf.concat([tf.reshape(log_partition - transition_logit[rel_trans_feat_ids[0, 3]], (-1,1)),
                         tf.reshape(arc_logit, (-1,1))], 0),
                lambda: tf.reshape(arc_logit, (-1, 1)))

            # correct shape
            current_output_shape = has_shift + arc_trans_count * rel_logit.get_shape()[1]
            arc_pred = tf.concat([arc_pred, 1e20 * tf.ones((tf.subtract(self.pred_output_size, current_output_shape), 1), dtype=tf.float32)], 0)
            arc_pred = tf.reshape(arc_pred, [-1])

            return arc_pred

library.py 文件源码项目：text2text 作者: google 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def bow_loss_by_example(logits,
                        targets,
                        weights,
                        average_across_timesteps=False):
  """Loss for a bow of logits (per example).

  As opposed to sequence loss this is supposed to ignore the order.
  Does not seem to work yet.

  Args:
    logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
    targets: List of 1D batch-sized int32 Tensors of the same length as
      logits.
    weights: List of 1D batch-sized float-Tensors of the same length as
      logits.
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.

  Returns:
    1D batch-sized float Tensor: The loss for each bow.

  Raises:
    ValueError: If len(logits) is different from len(targets) or len(weights).
  """
  if len(targets) != len(logits) or len(weights) != len(logits):
    raise ValueError('Lengths of logits, weights, and targets must be the same '
                     '%d, %d, %d.' % (len(logits), len(weights), len(targets)))

  batch_size = logits[0].shape[0]
  vocab_size = logits[0].shape[1]

  logitssum = tf.zeros((batch_size, vocab_size), tf.float32)
  targetset = tf.zeros((batch_size, vocab_size), tf.float32)
  for target, weight in zip(targets, weights):
    targetset += (tf.one_hot(target, vocab_size) * weight[:, None])
  weight = tf.ones((batch_size), tf.float32)
  for logit in logits:
    softmax = tf.nn.softmax(logit)
    logitssum += (logitssum * weight[:, None])
    weight = tf.maximum(0.0, weight - softmax[:, 3])

  # logitssum = tf.minimum(logitssum, 1.0)
  # targetset = tf.minimum(targetset, 1.0)
  # loss = tf.nn.sigmoid_cross_entropy_with_logits(
  #     labels=targetset, logits=logitssum)

  loss = tf.reduce_sum(tf.squared_difference(logitssum, targetset), axis=1)

  # crossent = tf.maximum(logitssum, 0.0) - (
  #     logitssum * targetset) + tf.log(1.0 + tf.exp(-1.0 * tf.abs(logitssum)))
  # log_perps = tf.reduce_logsumexp(crossent, axis=1)

  if average_across_timesteps:
    total_size = tf.add_n(weights)
    total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
    loss /= total_size

  return loss

loss.py 文件源码项目：tensorflow-layer-library 作者: bioinf-jku 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def blurred_cross_entropy(output, target, filter_size=11, sampling_range=3.5, pixel_weights=None):
    """
    Apply a Gaussian smoothing filter to the target probabilities (i.e. the one-hot 
    representation of target) and compute the cross entropy loss between softmax(output) 
    and the blurred target probabilities. 

    :param output: A rank-4 or rank-5 tensor with shape=(samples, [sequence_position,] x, y, num_classes) 
        representing the network input of the output layer (not activated)
    :param target: A rank-3 or rank-4 tensor with shape=(samples, [sequence_position,] x, y) representing 
        the target labels. It must contain int values in 0..num_classes-1. 
    :param filter_size: A length-2 list of int specifying the size of the Gaussian filter that will be 
        applied to the target probabilities. 
    :param pixel_weights: A rank-3 or rank-4 tensor with shape=(samples, [sequence_position,] x, y) 
        representing factors, that will be applied to the loss of the corresponding pixel. This can be 
        e.g. used to void certain pixels by weighting them to 0, i.e. suppress their error induction. 
    :return: A scalar operation representing the blurred cross entropy loss. 
    """
    # convert target to one-hot
    output_shape = output.shape.as_list()
    one_hot = tf.one_hot(target, output_shape[-1], dtype=tf.float32)

    if (len(output_shape) > 4):
        one_hot = tf.reshape(one_hot, [np.prod(output_shape[:-3])] + output_shape[-3:])

    # blur target probabilities
    #gauss_filter = weight_gauss_conv2d(filter_size + [output_shape[-1], 1])
    #blurred_target = tf.nn.depthwise_conv2d(one_hot, gauss_filter, [1, 1, 1, 1], 'SAME')
    blurred_target = gaussian_blur(one_hot, filter_size, sampling_range)

    if (len(output_shape) > 4):
        blurred_target = tf.reshape(blurred_target, output_shape)

    # compute log softmax predictions and cross entropy
    log_pred = output - tf.reduce_logsumexp(output, axis=[len(output_shape) - 1], keep_dims=True)

    # Apply pixel-wise weighting
    if pixel_weights is not None:
        log_pred *= pixel_weights

    cross_entropy = -tf.reduce_sum(blurred_target * log_pred, axis=[len(output_shape)-1])

    if pixel_weights is not None:
        loss = tf.reduce_sum(cross_entropy) / tf.reduce_sum(pixel_weights)
    else:
        loss = tf.reduce_mean(cross_entropy)

    return loss

variational_inference.py 文件源码项目：proximity_vi 作者: altosaar 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def build_elbo(self, n_samples, training=False):
    cfg = self.config
    reuse = False
    if training:
      reuse = True
    z = self.variational.sample(self.data, n_samples=n_samples, reuse=reuse)
    log_q_z = self.variational.log_prob(z, reuse=reuse)
    self.log_q_z = log_q_z
    log_p_x_z = self.model.log_prob(self.data, z, reuse=reuse)
    if cfg['optim/deterministic_annealing'] and training:
      self.build_magnitude()
      tf.summary.scalar('c/magnitude', self.magnitude)
      magnitude = tf.maximum(1., self.magnitude)
      elbo = log_p_x_z - magnitude * log_q_z
    else:
      elbo = log_p_x_z - log_q_z
    if training:
      self.elbo_loss = elbo
      _, variance = tf.nn.moments(elbo, [0])
      self.elbo_variance = tf.reduce_mean(variance)
      self.log_q_z_loss = log_q_z
      self.variational.build_entropy(z)
      self.q_z_sample = z
      slim.summarize_collection('variational')
      slim.summarize_collection('model')
      slim.summarize_activations('variational')
      slim.summarize_activations('model')
    else:
      self.elbo = elbo
      self.log_q_z = log_q_z
      self.log_p_x_hat = (tf.reduce_logsumexp(elbo, [0], keep_dims=True) -
                          tf.log(float(cfg['q/n_samples_stats'])))
      tf.summary.scalar('o/log_p_x_hat', tf.reduce_mean(self.log_p_x_hat))

      def sum_mean(x): return tf.reduce_sum(tf.reduce_mean(x, 0))
      self.elbo_sum = sum_mean(elbo)
      self.q_entropy = -sum_mean(log_q_z)
      self.E_log_lik = sum_mean(log_p_x_z)
      tf.summary.scalar('o/elbo_sum', sum_mean(elbo))
      tf.summary.scalar('o/elbo_mean', sum_mean(elbo) / cfg['batch_size'])
      tf.summary.scalar('o/E_log_q_z', sum_mean(log_q_z))
      tf.summary.scalar('o/E_log_p_x_z', self.E_log_lik)

span_prediction.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def predict(self, answer, start_logits, end_logits, mask) -> Prediction:
        bound = self.bound
        f1_weight = self.f1_weight
        aggregate = self.aggregate
        masked_logits1 = exp_mask(start_logits, mask)
        masked_logits2 = exp_mask(end_logits, mask)

        span_logits = []
        for i in range(self.bound):
            if i == 0:
                span_logits.append(masked_logits1 + masked_logits2)
            else:
                span_logits.append(masked_logits1[:, :-i] + masked_logits2[:, i:])
        span_logits = tf.concat(span_logits, axis=1)
        l = tf.shape(start_logits)[1]

        if len(answer) == 1:
            answer = answer[0]
            if answer.dtype == tf.int32:
                if f1_weight == 0:
                    answer_ix = to_packed_coordinates(answer, l, bound)
                    loss = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=span_logits, labels=answer_ix))
                else:
                    f1_mask = packed_span_f1_mask(answer, l, bound)
                    if f1_weight < 1:
                        f1_mask *= f1_weight
                        f1_mask += (1 - f1_weight) * tf.one_hot(to_packed_coordinates(answer, l, bound), l)
                    # TODO can we stay in log space?  (actually its tricky since f1_mask can have zeros...)
                    probs = tf.nn.softmax(span_logits)
                    loss = -tf.reduce_mean(tf.log(tf.reduce_sum(probs * f1_mask, axis=1)))
            else:
                log_norm = tf.reduce_logsumexp(span_logits, axis=1)
                if aggregate == "sum":
                    log_score = tf.reduce_logsumexp(
                        span_logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)),
                        axis=1)
                elif aggregate == "max":
                    log_score = tf.reduce_max(span_logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)),
                                              axis=1)
                else:
                    raise NotImplementedError()
                loss = tf.reduce_mean(-(log_score - log_norm))
        else:
            raise NotImplementedError()

        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
        return PackedSpanPrediction(span_logits, l, bound)