mgru_rte_model.py 文件源码-python代码片段

def forward(self, premise, hypothesis, training=False):
        '''
        inputs:
            premise : batch x T
            hypothesis : batch x T
        outputs :
            pred : batch x num_classes
        '''
        self.train(training)
        batch_size = premise.size(0)

        mask_p = torch.ne(premise, 0).type(dtype)
        mask_h = torch.ne(hypothesis, 0).type(dtype)

        encoded_p = self.embedding(premise)  # batch x T x n_embed
        encoded_p = F.dropout(encoded_p, p=self.options['DROPOUT'], training=training)

        encoded_h = self.embedding(hypothesis)  # batch x T x n_embed
        encoded_h = F.dropout(encoded_h, p=self.options['DROPOUT'], training=training)

        encoded_p = encoded_p.transpose(1, 0)  # T x batch x n_embed
        encoded_h = encoded_h.transpose(1, 0)  # T x batch x n_embed

        mask_p = mask_p.transpose(1, 0)  # T x batch
        mask_h = mask_h.transpose(1, 0)  # T x batch

        h_p_0, h_n_0 = self.init_hidden(batch_size)  # 1 x batch x n_dim
        o_p, h_n = self._gru_forward(self.p_gru, encoded_p, mask_p, h_p_0)  # o_p : T x batch x n_dim
                                                                            # h_n : 1 x batch x n_dim

        o_h, h_n = self._gru_forward(self.h_gru, encoded_h, mask_h, h_n_0)  # o_h : T x batch x n_dim
                                                            # h_n : 1 x batch x n_dim

        r_0 = self.attn_gru_init_hidden(batch_size)
        h_star, alpha_vec = self._attn_gru_forward(o_h, mask_h, r_0, o_p, mask_p)

        h_star = self.out(h_star)  # batch x num_classes
        if self.options['LAST_NON_LINEAR']:
            h_star = F.leaky_relu(h_star)  # Non linear projection
        pred = F.log_softmax(h_star)
        return pred