def backward(self, grad_output):
tensors = self.saved_tensors
if len(tensors) == 2:
input, weight = tensors
bias = None
else:
input, weight, bias = tensors
grad_input = grad_weight = grad_bias = None
if self.needs_input_grad[0]:
grad_input = torch.mm(grad_output, weight)
if self.needs_input_grad[1]:
grad_weight = torch.mm(grad_output.t(), input)
if bias is not None and self.needs_input_grad[2]:
grad_bias = torch.mv(grad_output.t(), self.add_buffer)
if bias is not None:
return grad_input, grad_weight, grad_bias
else:
return grad_input, grad_weight
python类mv()的实例源码
def backward(self, grad_output):
matrix, vector = self.saved_tensors
grad_add_vector = grad_matrix = grad_vector = None
if self.needs_input_grad[0]:
grad_add_vector = grad_output
if self.alpha != 1:
grad_add_vector = grad_add_vector.mul(self.alpha)
if self.needs_input_grad[1]:
grad_matrix = torch.ger(grad_output, vector)
if self.beta != 1:
grad_matrix *= self.beta
if self.needs_input_grad[2]:
grad_vector = torch.mv(matrix.t(), grad_output)
if self.beta != 1:
grad_vector *= self.beta
return grad_add_vector, grad_matrix, grad_vector
def backward(self, grad_output):
vector1, vector2 = self.saved_tensors
grad_add_matrix = grad_vector1 = grad_vector2 = None
if self.needs_input_grad[0]:
grad_add_matrix = grad_output
if self.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(self.alpha)
if self.needs_input_grad[1]:
grad_vector1 = torch.mv(grad_output, vector2)
if self.beta != 1:
grad_vector1 *= self.beta
if self.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output)
if self.beta != 1:
grad_vector2 *= self.beta
return grad_add_matrix, grad_vector1, grad_vector2
def backward(ctx, grad_output):
matrix, vector = ctx.saved_variables
grad_add_vector = grad_matrix = grad_vector = None
if ctx.needs_input_grad[0]:
grad_add_vector = grad_output
if ctx.alpha != 1:
grad_add_vector = grad_add_vector.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_matrix = torch.ger(grad_output, vector)
if ctx.beta != 1:
grad_matrix *= ctx.beta
if ctx.needs_input_grad[2]:
grad_vector = torch.mv(matrix.t(), grad_output)
if ctx.beta != 1:
grad_vector *= ctx.beta
return grad_add_vector, grad_matrix, grad_vector, None, None, None
def backward(ctx, grad_output):
vector1, vector2 = ctx.saved_variables
grad_add_matrix = grad_vector1 = grad_vector2 = None
if ctx.needs_input_grad[0]:
grad_add_matrix = grad_output
if ctx.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_vector1 = torch.mv(grad_output, vector2)
if ctx.beta != 1:
grad_vector1 *= ctx.beta
if ctx.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
if ctx.beta != 1:
grad_vector2 *= ctx.beta
return grad_add_matrix, grad_vector1, grad_vector2, None, None, None
def updateOutput(self, input):
M, v = input
assert M.ndimension() == 2 or M.ndimension() == 3
if M.ndimension() == 2:
assert v.ndimension() == 1
if self.trans:
M = M.transpose(0, 1)
self.output.resize_(M.size(0))
torch.mv(M, v, out=self.output)
else:
assert v.ndimension() == 2
if self.trans:
M = M.transpose(1, 2)
self.output.resize_(M.size(0), M.size(1), 1)
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
return self.output
def dot_nd(query, candidates):
"""
Perform a dot product between a query and n-dimensional candidates.
Args:
query (Variable): A vector to query, whose size is
(query_dim,)
candidates (Variable): A n-dimensional tensor to be multiplied
by query, whose size is (d0, d1, ..., dn, query_dim)
Returns:
output: The result of the dot product, whose size is
(d0, d1, ..., dn)
"""
cands_size = candidates.size()
cands_flat = candidates.view(-1, cands_size[-1])
output_flat = torch.mv(cands_flat, query)
output = output_flat.view(*cands_size[:-1])
return output
def backward(ctx, grad_output):
matrix, vector = ctx.saved_variables
grad_add_vector = grad_matrix = grad_vector = None
if ctx.needs_input_grad[0]:
grad_add_vector = grad_output
if ctx.alpha != 1:
grad_add_vector = grad_add_vector.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_matrix = torch.ger(grad_output, vector)
if ctx.beta != 1:
grad_matrix *= ctx.beta
if ctx.needs_input_grad[2]:
grad_vector = torch.mv(matrix.t(), grad_output)
if ctx.beta != 1:
grad_vector *= ctx.beta
return grad_add_vector, grad_matrix, grad_vector, None, None, None
def backward(ctx, grad_output):
vector1, vector2 = ctx.saved_variables
grad_add_matrix = grad_vector1 = grad_vector2 = None
if ctx.needs_input_grad[0]:
grad_add_matrix = grad_output
if ctx.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_vector1 = torch.mv(grad_output, vector2)
if ctx.beta != 1:
grad_vector1 *= ctx.beta
if ctx.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
if ctx.beta != 1:
grad_vector2 *= ctx.beta
return grad_add_matrix, grad_vector1, grad_vector2, None, None, None
def updateOutput(self, input):
M, v = input
assert M.ndimension() == 2 or M.ndimension() == 3
if M.ndimension() == 2:
assert v.ndimension() == 1
if self.trans:
M = M.transpose(0, 1)
self.output.resize_(M.size(0))
torch.mv(M, v, out=self.output)
else:
assert v.ndimension() == 2
if self.trans:
M = M.transpose(1, 2)
self.output.resize_(M.size(0), M.size(1), 1)
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
return self.output
def backward(ctx, grad_output):
matrix, vector = ctx.saved_variables
grad_add_vector = grad_matrix = grad_vector = None
if ctx.needs_input_grad[0]:
grad_add_vector = maybe_unexpand(grad_output, ctx.add_vector_size)
if ctx.alpha != 1:
grad_add_vector = grad_add_vector.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_matrix = torch.ger(grad_output, vector)
if ctx.beta != 1:
grad_matrix *= ctx.beta
if ctx.needs_input_grad[2]:
grad_vector = torch.mv(matrix.t(), grad_output)
if ctx.beta != 1:
grad_vector *= ctx.beta
return grad_add_vector, grad_matrix, grad_vector, None, None, None
def backward(ctx, grad_output):
vector1, vector2 = ctx.saved_variables
grad_add_matrix = grad_vector1 = grad_vector2 = None
if ctx.needs_input_grad[0]:
grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size)
if ctx.alpha != 1:
grad_add_matrix = grad_add_matrix.mul(ctx.alpha)
if ctx.needs_input_grad[1]:
grad_vector1 = torch.mv(grad_output, vector2)
if ctx.beta != 1:
grad_vector1 *= ctx.beta
if ctx.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
if ctx.beta != 1:
grad_vector2 *= ctx.beta
return grad_add_matrix, grad_vector1, grad_vector2, None, None, None
def updateOutput(self, input):
M, v = input
assert M.ndimension() == 2 or M.ndimension() == 3
if M.ndimension() == 2:
assert v.ndimension() == 1
if self.trans:
M = M.transpose(0, 1)
self.output.resize_(M.size(0))
torch.mv(M, v, out=self.output)
else:
assert v.ndimension() == 2
if self.trans:
M = M.transpose(1, 2)
self.output.resize_(M.size(0), M.size(1), 1)
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
return self.output
def updateOutput(self, input):
M, v = input
assert M.ndimension() == 2 or M.ndimension() == 3
if M.ndimension() == 2:
assert v.ndimension() == 1
if self.trans:
M = M.transpose(0, 1)
self.output.resize_(M.size(0))
torch.mv(M, v, out=self.output)
else:
assert v.ndimension() == 2
if self.trans:
M = M.transpose(1, 2)
self.output.resize_(M.size(0), M.size(1), 1)
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
return self.output
def backward(self, grad_output):
input, weight, bias = self.saved_tensors
grad_input = grad_weight = grad_bias = None
if self.needs_input_grad[0]:
grad_output = grad_output.squeeze()
grad_input = torch.mm(grad_output, weight)
if self.needs_input_grad[1]:
grad_weight = torch.mm(grad_output.t(), input)
if bias is not None and self.needs_input_grad[2]:
grad_bias = torch.mv(grad_output.t(), self.add_buffer)
if bias is not None:
return grad_input, grad_weight, grad_bias
else:
return grad_input, grad_weight
def __matmul__(self, other):
dim_self = self.dim()
dim_other = other.dim()
# TODO: should this really be dot product?
# if dim_self == 1 and dim_other == 1:
# return self.dot(other)
if dim_self == 2 and dim_other == 1:
return torch.mv(self, other)
elif dim_self == 2 and dim_other == 2:
return torch.mm(self, other)
def accGradParameters(self, input, gradOutput, scale=1):
self.network.accGradParameters([input, self.partition], gradOutput, scale)
if self.bias:
self.buffer = self.buffer or input.new()
self.buffer.resize_(gradOutput.size(1))
torch.mv(self.buffer, gradOutput.t(), self.addBuffer).mul_(scale)
self.gradBias.index_add_(
1, self.partition, self.buffer.view(1, self.buffer.nelement())
)
def test_mv(self):
m1 = torch.randn(100,100)
v1 = torch.randn(100)
res1 = torch.mv(m1,v1)
res2 = res1.clone().zero_()
for i, j in iter_indices(m1):
res2[i] += m1[i][j] * v1[j]
self.assertEqual(res1, res2)
def gmm_batch_model(data):
p = pyro.param("p", Variable(torch.Tensor([0.3]), requires_grad=True))
p = torch.cat([p, 1 - p])
sigma = pyro.param("sigma", Variable(torch.Tensor([1.0]), requires_grad=True))
mus = Variable(torch.Tensor([-1, 1]))
with pyro.iarange("data", len(data)) as batch:
n = len(batch)
z = pyro.sample("z", dist.Categorical(p.unsqueeze(0).expand(n, 2)))
assert z.size() == (n, 2)
mu = torch.mv(z, mus)
pyro.observe("x", dist.Normal(mu, sigma.expand(n)), data[batch])
def test_ip_forward():
p_t, Q_t, G_t, A_t, z0_t, s0_t = [torch.Tensor(x) for x in [p, Q, G, A, z0, s0]]
b = torch.mv(A_t, z0_t) if neq > 0 else None
h = torch.mv(G_t,z0_t)+s0_t
L_Q, L_S, R = aip.pre_factor_kkt(Q_t, G_t, A_t)
zhat_ip, nu_ip, lam_ip = aip.forward_single(p_t, Q_t, G_t, A_t, b, h, L_Q, L_S, R)
# Unnecessary clones here because of a pytorch bug when calling numpy
# on a tensor with a non-zero offset.
npt.assert_allclose(zhat, zhat_ip.clone().numpy(), rtol=RTOL, atol=ATOL)
if neq > 0:
npt.assert_allclose(nu, nu_ip.clone().numpy(), rtol=RTOL, atol=ATOL)
npt.assert_allclose(lam, lam_ip.clone().numpy(), rtol=RTOL, atol=ATOL)
def prof_instance(nz, neq, nineq, nIter, cuda):
L = np.tril(npr.uniform(0,1, (nz,nz))) + np.eye(nz,nz)
G = npr.randn(nineq,nz)
A = npr.randn(neq,nz)
z0 = npr.randn(nz)
s0 = np.ones(nineq)
p = npr.randn(nz)
p, L, G, A, z0, s0 = [torch.Tensor(x) for x in [p, L, G, A, z0, s0]]
Q = torch.mm(L, L.t())+0.001*torch.eye(nz).type_as(L)
if cuda:
p, L, Q, G, A, z0, s0 = [x.cuda() for x in [p, L, Q, G, A, z0, s0]]
af = adact.AdactFunction()
start = time.time()
# One-time cost for numpy conversion.
p_np, L_np, G_np, A_np, z0_np, s0_np = [adact.toNp(v) for v in [p, L, G, A, z0, s0]]
cp = time.time()-start
for i in range(nIter):
start = time.time()
zhat, nu, lam = af.forward_single_np(p_np, L_np, G_np, A_np, z0_np, s0_np)
cp += time.time()-start
b = torch.mv(A, z0) if neq > 0 else None
h = torch.mv(G, z0)+s0
L_Q, L_S, R = aip.pre_factor_kkt(Q, G, A, nineq, neq)
pdipm = []
for i in range(nIter):
start = time.time()
zhat_ip, nu_ip, lam_ip = aip.forward_single(p, Q, G, A, b, h, L_Q, L_S, R)
pdipm.append(time.time()-start)
return cp, np.sum(pdipm)
def backward(ctx, grad_output):
input, weight, bias = ctx.saved_variables
grad_input = grad_weight = grad_bias = None
if ctx.needs_input_grad[0]:
grad_input = torch.mm(grad_output, weight)
if ctx.needs_input_grad[1]:
grad_weight = torch.mm(grad_output.t(), input)
if bias is not None and ctx.needs_input_grad[2]:
grad_bias = torch.mv(grad_output.t(), Variable(ctx.add_buffer))
if bias is not None:
return grad_input, grad_weight, grad_bias
else:
return grad_input, grad_weight
def accGradParameters(self, input, gradOutput, scale=1):
self.network.accGradParameters([input, self.partition], gradOutput, scale)
if self.bias is not None:
if self.buffer is None:
self.buffer = input.new()
self.buffer.resize_(gradOutput.size(1))
torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
self.gradBias.index_add_(
1, self.partition, self.buffer.view(1, self.buffer.nelement())
)
def test_mv(self):
m1 = torch.randn(100, 100)
v1 = torch.randn(100)
res1 = torch.mv(m1, v1)
res2 = res1.clone().zero_()
for i, j in iter_indices(m1):
res2[i] += m1[i][j] * v1[j]
self.assertEqual(res1, res2)
def accGradParameters(self, input, gradOutput, scale=1):
self.network.accGradParameters([input, self.partition], gradOutput, scale)
if self.bias is not None:
if self.buffer is None:
self.buffer = input.new()
self.buffer.resize_(gradOutput.size(1))
torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
self.gradBias.index_add_(
1, self.partition, self.buffer.view(1, self.buffer.nelement())
)
def test_functional_blas(self):
def compare(fn, *args):
unpacked_args = tuple(arg.data if isinstance(arg, Variable) else arg
for arg in args)
self.assertEqual(fn(*args).data, fn(*unpacked_args))
def test_blas_add(fn, x, y, z):
# Checks all signatures
compare(fn, x, y, z)
compare(fn, 0.5, x, y, z)
compare(fn, 0.5, x, 0.25, y, z)
def test_blas(fn, x, y):
compare(fn, x, y)
test_blas(torch.mm, Variable(torch.randn(2, 10)),
Variable(torch.randn(10, 4)))
test_blas_add(torch.addmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.bmm, Variable(torch.randn(4, 2, 10)),
Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.addbmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.baddbmm, Variable(torch.randn(4, 2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas(torch.mv, Variable(torch.randn(2, 10)),
Variable(torch.randn(10)))
test_blas_add(torch.addmv, Variable(torch.randn(2)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10)))
test_blas(torch.ger, Variable(torch.randn(5)),
Variable(torch.randn(6)))
test_blas_add(torch.addr, Variable(torch.randn(5, 6)),
Variable(torch.randn(5)), Variable(torch.randn(6)))
def forward(self, h_temporal, h_spatials):
'''
Forward pass for the model
params:
h_temporal : Hidden state of the temporal edgeRNN
h_spatials : Hidden states of all spatial edgeRNNs connected to the node.
'''
# Number of spatial edges
num_edges = h_spatials.size()[0]
# Embed the temporal edgeRNN hidden state
temporal_embed = self.temporal_edge_layer(h_temporal)
temporal_embed = temporal_embed.squeeze(0)
# Embed the spatial edgeRNN hidden states
spatial_embed = self.spatial_edge_layer(h_spatials)
# Dot based attention
attn = torch.mv(spatial_embed, temporal_embed)
# Variable length
temperature = num_edges / np.sqrt(self.attention_size)
attn = torch.mul(attn, temperature)
# Softmax
attn = torch.nn.functional.softmax(attn)
# Compute weighted value
weighted_value = torch.mv(torch.t(h_spatials), attn)
return weighted_value, attn
def accGradParameters(self, input, gradOutput, scale=1):
self.network.accGradParameters([input, self.partition], gradOutput, scale)
if self.bias is not None:
if self.buffer is None:
self.buffer = input.new()
self.buffer.resize_(gradOutput.size(1))
torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
self.gradBias.index_add_(
1, self.partition, self.buffer.view(1, self.buffer.nelement())
)
def solve_kkt(U_Q, d, G, A, U_S, rx, rs, rz, ry, dbg=False):
""" Solve KKT equations for the affine step"""
nineq, nz, neq, _ = get_sizes(G, A)
invQ_rx = torch.potrs(rx.view(-1, 1), U_Q).view(-1)
if neq > 0:
h = torch.cat([torch.mv(A, invQ_rx) - ry,
torch.mv(G, invQ_rx) + rs / d - rz], 0)
else:
h = torch.mv(G, invQ_rx) + rs / d - rz
w = -torch.potrs(h.view(-1, 1), U_S).view(-1)
g1 = -rx - torch.mv(G.t(), w[neq:])
if neq > 0:
g1 -= torch.mv(A.t(), w[:neq])
g2 = -rs - w[neq:]
dx = torch.potrs(g1.view(-1, 1), U_Q).view(-1)
ds = g2 / d
dz = w[neq:]
dy = w[:neq] if neq > 0 else None
# if np.all(np.array([x.norm() for x in [rx, rs, rz, ry]]) != 0):
if dbg:
import IPython
import sys
IPython.embed()
sys.exit(-1)
# if rs.norm() > 0: import IPython, sys; IPython.embed(); sys.exit(-1)
return dx, ds, dz, dy
def factor_solve_kkt(Q, D, G, A, rx, rs, rz, ry):
nineq, nz, neq, _ = get_sizes(G, A)
if neq > 0:
H_ = torch.cat([torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)], 0)
A_ = torch.cat([torch.cat([G, torch.eye(nineq).type_as(Q)], 1),
torch.cat([A, torch.zeros(neq, nineq).type_as(Q)], 1)], 0)
g_ = torch.cat([rx, rs], 0)
h_ = torch.cat([rz, ry], 0)
else:
H_ = torch.cat([torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)], 0)
A_ = torch.cat([G, torch.eye(nineq).type_as(Q)], 1)
g_ = torch.cat([rx, rs], 0)
h_ = rz
U_H_ = torch.potrf(H_)
invH_A_ = torch.potrs(A_.t(), U_H_)
invH_g_ = torch.potrs(g_.view(-1, 1), U_H_).view(-1)
S_ = torch.mm(A_, invH_A_)
U_S_ = torch.potrf(S_)
t_ = torch.mv(A_, invH_g_).view(-1, 1) - h_
w_ = -torch.potrs(t_, U_S_).view(-1)
v_ = torch.potrs(-g_.view(-1, 1) - torch.mv(A_.t(), w_), U_H_).view(-1)
return v_[:nz], v_[nz:], w_[:nineq], w_[nineq:] if neq > 0 else None