def backward(ctx, grad_output):
batch1, batch2 = ctx.saved_variables
grad_add_matrix = grad_batch1 = grad_batch2 = None
if ctx.needs_input_grad[0]:
grad_add_matrix = grad_output
if ctx.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(ctx.alpha)
if any(ctx.needs_input_grad[1:]):
batch_grad_output = (grad_output
.unsqueeze(0)
.expand(batch1.size(0), batch1.size(1), batch2.size(2)))
if ctx.needs_input_grad[1]:
grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2))
if ctx.beta != 1:
grad_batch1 *= ctx.beta
if ctx.needs_input_grad[2]:
grad_batch2 = torch.bmm(batch1.transpose(1, 2), batch_grad_output)
if ctx.beta != 1:
grad_batch2 *= ctx.beta
return grad_add_matrix, grad_batch1, grad_batch2, None, None, None
评论列表
文章目录