def backward(self, grad_output):
batch1, batch2 = self.saved_tensors
grad_add_matrix = grad_batch1 = grad_batch2 = None
if self.needs_input_grad[0]:
grad_add_matrix = grad_output
if self.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(self.alpha)
if any(self.needs_input_grad[1:]):
batch_grad_output = (grad_output
.unsqueeze(0)
.expand(batch1.size(0), batch1.size(1), batch2.size(2)))
if self.needs_input_grad[1]:
grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2))
if self.beta != 1:
grad_batch1 *= self.beta
if self.needs_input_grad[2]:
grad_batch2 = torch.bmm(batch1.transpose(1, 2), batch_grad_output)
if self.beta != 1:
grad_batch2 *= self.beta
return grad_add_matrix, grad_batch1, grad_batch2
评论列表
文章目录