python类softmax()的实例源码-面圈网

model.py 文件源码项目：python-utils 作者: zhijian-liu 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def forward(self, inputs):
        # set up batch size
        batch_size = inputs.size(0)

        # compute hidden and cell
        hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
        cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
        hidden_cell = (hidden, cell)

        # recurrent neural networks
        outputs, _ = self.rnn.forward(inputs, hidden_cell)
        outputs = outputs.contiguous().view(-1, self.hidden_size * 2)

        # compute classifications by outputs
        outputs = self.classifier.forward(outputs)
        outputs = F.softmax(outputs)
        outputs = outputs.view(batch_size, -1, self.num_classes)
        return outputs

seq2seq.py 文件源码项目：deep-text-corrector 作者: andabi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def forward(self, hidden, encoder_outputs):
        # hidden.size() = (B, H), encoder_outputs.size() = (B, S, H)
        batch_size, encoder_outputs_len, _ = encoder_outputs.size()

        # Create variable to store attention energies
        # attn_energies.size() = (B, S)
        attn_energies = Variable(torch.zeros((batch_size, encoder_outputs_len)))  # B x S
        if Config.use_cuda: attn_energies = attn_energies.cuda()

        # Calculate energies for each encoder output
        # attn_energies.size() = (B, S)
        for i in range(encoder_outputs_len):
            attn_energies[:, i] = self.score(hidden, encoder_outputs[:, i])
            # print attn_energies[:, i]

        # Normalize energies to weights in range 0 to 1
        return F.softmax(attn_energies)

lstm.py 文件源码项目：ParlAI 作者: facebookresearch 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def forward(self, input, source_hids):
        # input: bsz x input_embed_dim
        # source_hids: srclen x bsz x output_embed_dim

        # x: bsz x output_embed_dim
        x = self.input_proj(input)

        # compute attention
        attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
        attn_scores = F.softmax(attn_scores.t()).t()  # srclen x bsz

        # sum weighted sources
        x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)

        x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
        return x, attn_scores

fconv.py 文件源码项目：ParlAI 作者: facebookresearch 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def forward(self, x, target_embedding, encoder_out):
        residual = x

        # attention
        x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
        x = self.bmm(x, encoder_out[0])

        # softmax over last dim
        sz = x.size()
        x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
        x = x.view(sz)
        attn_scores = x

        x = self.bmm(x, encoder_out[1])

        # scale attention output
        s = encoder_out[1].size(1)
        x = x * (s * math.sqrt(1.0 / s))

        # project back
        x = (self.out_projection(x) + residual) * math.sqrt(0.5)
        return x, attn_scores

layers.py 文件源码项目：ParlAI 作者: facebookresearch 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def forward(self, x, y, x_mask):
        """
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        """
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_(x_mask.data, -float('inf'))
        if self.training:
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy)
        else:
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy)
        return alpha

util.py 文件源码项目：pyro 作者: uber 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def softmax(x, dim=-1):
    """
    TODO: change to use the default pyTorch implementation when available
    Source: https://discuss.pytorch.org/t/why-softmax-function-cant-specify-the-dimension-to-operate/2637
    :param x: tensor
    :param dim: Dimension to apply the softmax function to. The elements of the tensor in this
        dimension must sum to 1.
    :return: tensor having the same dimension as `x` rescaled along dim
    """
    input_size = x.size()

    trans_input = x.transpose(dim, len(input_size) - 1)
    trans_size = trans_input.size()

    input_2d = trans_input.contiguous().view(-1, trans_size[-1])

    try:
        soft_max_2d = F.softmax(input_2d, 1)
    except TypeError:
        # Support older pytorch 0.2 release.
        soft_max_2d = F.softmax(input_2d)

    soft_max_nd = soft_max_2d.view(*trans_size)
    return soft_max_nd.transpose(dim, len(input_size) - 1)

layers.py 文件源码项目：sru 作者: taolei87 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def forward(self, x, y, x_mask):
        """
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        """
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_(x_mask.data, -float('inf'))
        if self.training:
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy)
        else:
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy)
        return alpha

model_sepEmbSepTags.py 文件源码项目：seq_tagger 作者: OSU-slatelab 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def getAttnOutput(input, attnScorer, winSize=0):    # get attention output following [Liu and Lane, Interspeech 2016]. the input is seqlen X batchsize X dim. if winSize is 0, all the time steps are used for the weigted averaging
    attnSeq = []
    for i in range(input.size(0)):
        curSeq = []
        if i > 0:
            leftBegin = 0
            if winSize > 0:
                leftBegin = max(0, i-winSize)
            curSeq.append(input[leftBegin:i])
        if i < input.size(0):
            leftEnd = input.size(0)
            if winSize > 0:
                leftEnd = min(i+winSize+1, input.size(0))
            curSeq.append(input[i:leftEnd])
        curSeq = torch.cat(curSeq, 0)
        cur = input[i:i+1].expand_as(curSeq)

        attnScores = attnScorer( torch.cat([cur, curSeq], 2).view(-1, 2*input.size(2)) )    # get attention scores
        transAttnScores = attnScores.view(curSeq.size(0), input.size(1)).transpose(0, 1)    # batchSize X curSeqLen
        smOut = F.softmax(transAttnScores).transpose(0, 1)
        smOutSeq = smOut.unsqueeze(2).expand_as(curSeq)
        weightedAvgSeq = (curSeq * smOutSeq).sum(0)
        attnSeq.append(weightedAvgSeq)
    attnSeq = torch.cat(attnSeq, 0)
    return torch.cat([input, attnSeq], 2)

EncoderDecoder.py 文件源码项目：bandit-nmt 作者: khanhptnk 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def sample(self, inputs, max_length):
        targets, init_states = self.initialize(inputs, eval=False)
        emb, output, hidden, context = init_states

        outputs = []
        samples = []
        batch_size = targets.size(1)
        num_eos = targets[0].data.byte().new(batch_size).zero_()

        for i in range(max_length):
            output, hidden = self.decoder.step(emb, output, hidden, context)
            outputs.append(output)
            dist = F.softmax(self.generator(output))
            sample = dist.multinomial(1, replacement=False).view(-1).data
            samples.append(sample)

            # Stop if all sentences reach EOS.
            num_eos |= (sample == lib.Constants.EOS)
            if num_eos.sum() == batch_size: break

            emb = self.decoder.word_lut(Variable(sample))

        outputs = torch.stack(outputs)
        samples = torch.stack(samples)
        return samples, outputs

test_oim.py 文件源码项目：open-reid 作者: Cysu 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_forward_backward(self):
        import torch
        import torch.nn.functional as F
        from torch.autograd import Variable
        from reid.loss import OIMLoss
        criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
        criterion.lut = torch.eye(3)
        x = Variable(torch.randn(3, 3), requires_grad=True)
        y = Variable(torch.range(0, 2).long())
        loss = criterion(x, y)
        loss.backward()
        probs = F.softmax(x)
        grads = probs.data - torch.eye(3)
        abs_diff = torch.abs(grads - x.grad.data)
        self.assertEquals(torch.log(probs).diag().sum(), -loss)
        self.assertTrue(torch.max(abs_diff) < 1e-6)

faster_rcnn.py 文件源码项目：faster_rcnn_pytorch 作者: longcw 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)

        if self.training:
            roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
            rois = roi_data[0]

        # roi pool
        pooled_features = self.roi_pool(features, rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = self.fc6(x)
        x = F.dropout(x, training=self.training)
        x = self.fc7(x)
        x = F.dropout(x, training=self.training)

        cls_score = self.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = self.bbox_fc(x)

        if self.training:
            self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)

        return cls_prob, bbox_pred, rois

lstm.py 文件源码项目：fairseq-py 作者: facebookresearch 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def forward(self, input, source_hids):
        # input: bsz x input_embed_dim
        # source_hids: srclen x bsz x output_embed_dim

        # x: bsz x output_embed_dim
        x = self.input_proj(input)

        # compute attention
        attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
        attn_scores = F.softmax(attn_scores.t()).t()  # srclen x bsz

        # sum weighted sources
        x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)

        x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
        return x, attn_scores

fconv.py 文件源码项目：fairseq-py 作者: facebookresearch 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def forward(self, x, target_embedding, encoder_out):
        residual = x

        # attention
        x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
        x = self.bmm(x, encoder_out[0])

        # softmax over last dim
        sz = x.size()
        x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
        x = x.view(sz)
        attn_scores = x

        x = self.bmm(x, encoder_out[1])

        # scale attention output
        s = encoder_out[1].size(1)
        x = x * (s * math.sqrt(1.0 / s))

        # project back
        x = (self.out_projection(x) + residual) * math.sqrt(0.5)
        return x, attn_scores

sequence_generator.py 文件源码项目：fairseq-py 作者: facebookresearch 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def _decode(self, tokens, encoder_outs):
        # wrap in Variable
        tokens = Variable(tokens, volatile=True)

        avg_probs = None
        avg_attn = None
        for model, encoder_out in zip(self.models, encoder_outs):
            decoder_out, attn = model.decoder(tokens, encoder_out)
            probs = F.softmax(decoder_out[:, -1, :]).data
            attn = attn[:, -1, :].data
            if avg_probs is None or avg_attn is None:
                avg_probs = probs
                avg_attn = attn
            else:
                avg_probs.add_(probs)
                avg_attn.add_(attn)
        avg_probs.div_(len(self.models))
        avg_probs.log_()
        avg_attn.div_(len(self.models))

        return avg_probs, avg_attn

layers.py 文件源码项目：DrQA 作者: facebookresearch 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def forward(self, x, y, x_mask):
        """
        Args:
            x: batch * len * hdim1
            y: batch * hdim2
            x_mask: batch * len (1 for padding, 0 for true)
        Output:
            alpha = batch * len
        """
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_(x_mask.data, -float('inf'))
        if self.normalize:
            if self.training:
                # In training we output log-softmax for NLL
                alpha = F.log_softmax(xWy)
            else:
                # ...Otherwise 0-1 probabilities
                alpha = F.softmax(xWy)
        else:
            alpha = xWy.exp()
        return alpha

layers.py 文件源码项目：DrQA 作者: facebookresearch 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def forward(self, x, x_mask):
        """
        Args:
            x: batch * len * hdim
            x_mask: batch * len (1 for padding, 0 for true)
        Output:
            alpha: batch * len
        """
        x_flat = x.view(-1, x.size(-1))
        scores = self.linear(x_flat).view(x.size(0), x.size(1))
        scores.data.masked_fill_(x_mask.data, -float('inf'))
        alpha = F.softmax(scores)
        return alpha


# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------

MSDN_base.py 文件源码项目：MSDN 作者: yikang-li 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def object_detection_gt_boxes(self, image_path, gt_boxes):
        min_score = 1/150.
        image = cv2.imread(image_path)
        # print 'image.shape', image.shape
        im_data, im_scales = self.get_image_blob_noscale(image)
        gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0]
        # print 'im_data.shape', im_data.shape
        # print 'im_scales', im_scales
        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
            dtype=np.float32)
        object_result = self(im_data, im_info, gt_boxes)[0]
        cls_prob_object, bbox_object, object_rois = object_result[:]

        prob_object = F.softmax(cls_prob_object)
        prob = prob_object.cpu().data
        top_5_cls = torch.topk(prob[:, 1:], 5, dim=1)
        # print 'im_scales[0]', im_scales[0]
        return top_5_cls[1].numpy()

baselines.py 文件源码项目：clevr-iep 作者: facebookresearch 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def forward(self, v, u):
    """
    Input:
    - v: N x D x H x W
    - u: N x D

    Returns:
    - next_u: N x D
    """
    N, K = v.size(0), self.hidden_dim
    D, H, W = v.size(1), v.size(2), v.size(3)
    v_proj = self.Wv(v) # N x K x H x W
    u_proj = self.Wu(u) # N x K
    u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W)
    h = F.tanh(v_proj + u_proj_expand)
    p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W)
    self.attention_maps = p.data.clone()

    v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D)
    next_u = u + v_tilde
    return next_u

compAggWikiqa.py 文件源码项目：SeqMatchSeq 作者: pcgreat 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def new_att_module(self):

        class NewAttModule(nn.Module):
            def __init__(self):
                super(NewAttModule, self).__init__()

            def forward(self, linput, rinput):
                self.lPad = linput.view(-1, linput.size(0), linput.size(1))

                self.lPad = linput  # self.lPad = Padding(0, 0)(linput) TODO: figureout why padding?
                self.M_r = torch.mm(self.lPad, rinput.t())
                self.alpha = F.softmax(self.M_r.transpose(0, 1))
                self.Yl = torch.mm(self.alpha, self.lPad)
                return self.Yl

        att_module = NewAttModule()
        if getattr(self, "att_module_master", None):
            for (tar_param, src_param) in zip(att_module.parameters(), self.att_module_master.parameters()):
                tar_param.grad.data = src_param.grad.data.clone()
        return att_module

cnn.py 文件源码项目：pytorch-deform-conv 作者: oeway 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))#
        x = F.softmax(x)
        return x

cnn.py 文件源码项目：pytorch-deform-conv 作者: oeway 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = self.offset12(x)
        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = self.offset21(x)
        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = self.offset22(x)
        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))
        x = F.softmax(x)
        return x

Net.py 文件源码项目：Efficient-Dynamic-Batching 作者: jsuarez5341 项目源码文件源码阅读 90 收藏 0 点赞 0 评论 0

def forward(self, x, trainable, fast=True):
      q, img, ans, prog = x #Need ans for reinforce
      if not trainable: ans = None #Safety

      p = self.ProgramGenerator(q)

      #Finicky handling of PG-EE transition
      batch, sLen, v = p.size() 
      p = p.view(-1, v)
      p = F.softmax(p)
      p = p.view(batch, sLen, v)
      p, pInds = t.max(p, 2)
      pInds = pInds[:, :, 0]
      p= p[:, :, 0]

      a = self.ExecutionEngine((pInds, p, img), fast=fast)
      return a

temperature_scaling.py 文件源码项目：temperature_scaling 作者: gpleiss 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def forward(self, logits, labels):
        softmaxes = F.softmax(logits)
        confidences, predictions = torch.max(softmaxes, 1)
        accuracies = predictions.eq(labels)

        ece = Variable(torch.zeros(1)).type_as(logits)
        for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
            # Calculated |confidence - accuracy| in each bin
            in_bin = confidences.gt(bin_lower) * confidences.le(bin_upper)
            prop_in_bin = in_bin.float().mean()
            if prop_in_bin.data[0] > 0:
                accuracy_in_bin = accuracies[in_bin].float().mean()
                avg_confidence_in_bin = confidences[in_bin].mean()
                ece += torch.abs(avg_confidence_in_bin- accuracy_in_bin) * prop_in_bin

        return ece

layers.py 文件源码项目：DrQA 作者: hitvoice 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def forward(self, x, y, x_mask):
        """
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        """
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_(x_mask.data, -float('inf'))
        if self.training:
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy, dim=1)
        else:
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy, dim=1)
        return alpha

encoding.py 文件源码项目：PyTorch-Encoding 作者: zhanghang1989 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward(self, X):
        # input X is a 4D tensor
        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
        if X.dim() == 3:
            # BxDxN
            B, N, K, D = X.size(0), X.size(2), self.K, self.D
            X = X.transpose(1,2).contiguous()
        elif X.dim() == 4:
            # BxDxHxW
            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
            X = X.view(B,D,-1).transpose(1,2).contiguous()
        else:
            raise RuntimeError('Encoding Layer unknown input dims!')
        # assignment weights
        A = F.softmax(scaledL2()(X, self.codewords, self.scale))
        # aggregate
        E = aggregate()(A, X, self.codewords)
        return E

aggregate.py 文件源码项目：PyTorch-Encoding 作者: zhanghang1989 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def assign(R, S):
    r"""
    Calculate assignment weights for given residuals (:math:`R`) and scale (:math:`S`)

    .. math::
        a_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ik}\|^2)}

    Shape:
        - Input: :math:`R\in\mathcal{R}^{B\times N\times K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
        - Output :math:`A\in\mathcal{R}^{B\times N\times K}`

    """
    L = square_squeeze()(R)
    K = S.size(0)
    SL = L * S.view(1,1,K)
    return F.softmax(SL)

encoding.py 文件源码项目：PyTorch-Encoding 作者: zhanghang1989 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def forward(self, X):
        if isinstance(X, tuple) or isinstance(X, list):
            # for self-parallel mode, please see encoding.nn
            return my_data_parallel(self, X)
        elif not isinstance(X, Variable):
            raise RuntimeError('unknown input type')
        # input X is a 4D tensor
        assert(X.size(1)==self.D)
        if X.dim() == 3:
            # BxDxN
            B, N, K, D = X.size(0), X.size(2), self.K, self.D
            X = X.transpose(1,2).contiguous()
        elif X.dim() == 4:
            # BxDxHxW
            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
            X = X.view(B,D,-1).transpose(1,2).contiguous()
        else:
            raise RuntimeError('Encoding Layer unknown input dims!')
        # assignment weights NxKxD
        A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=1)
        # aggregate
        E = aggregate(A, X, self.codewords)
        return E

pytorch_model.py 文件源码项目：pytorch-avitm 作者: hyqneuron 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def forward(self, input, compute_loss=False, avg_loss=True):
        # compute posterior
        en1 = F.softplus(self.en1_fc(input))                            # en1_fc   output
        en2 = F.softplus(self.en2_fc(en1))                              # encoder2 output
        en2 = self.en2_drop(en2)
        posterior_mean   = self.mean_bn  (self.mean_fc  (en2))          # posterior mean
        posterior_logvar = self.logvar_bn(self.logvar_fc(en2))          # posterior log variance
        posterior_var    = posterior_logvar.exp()
        # take sample
        eps = Variable(input.data.new().resize_as_(posterior_mean.data).normal_()) # noise
        z = posterior_mean + posterior_var.sqrt() * eps                 # reparameterization
        p = F.softmax(z)                                                # mixture probability
        p = self.p_drop(p)
        # do reconstruction
        recon = F.softmax(self.decoder_bn(self.decoder(p)))             # reconstructed distribution over vocabulary

        if compute_loss:
            return recon, self.loss(input, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss)
        else:
            return recon

layers.py 文件源码项目：DrQA_cn 作者: AmoseKang 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward(self, x, y, x_mask):
        """
        Args:
            x: batch * len * hdim1
            y: batch * hdim2
            x_mask: batch * len (1 for padding, 0 for true)
        Output:
            alpha = batch * len
        """
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
        xWy.data.masked_fill_(x_mask.data, -float('inf'))
        if self.normalize:
            if self.training:
                # In training we output log-softmax for NLL
                alpha = F.log_softmax(xWy)
            else:
                # ...Otherwise 0-1 probabilities
                alpha = F.softmax(xWy)
        else:
            alpha = xWy.exp()
        return alpha

layers.py 文件源码项目：DrQA_cn 作者: AmoseKang 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def forward(self, x, x_mask):
        """
        Args:
            x: batch * len * hdim
            x_mask: batch * len (1 for padding, 0 for true)
        Output:
            alpha: batch * len
        """
        x_flat = x.view(-1, x.size(-1))
        scores = self.linear(x_flat).view(x.size(0), x.size(1))
        scores.data.masked_fill_(x_mask.data, -float('inf'))
        alpha = F.softmax(scores)
        return alpha


# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------