def _attend(self, p): weight = F.batch_matmul(self.source_hiddens, p) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention