def updateOutput(self, input):
assert input.dim() == 2
inputSize = self.weight.size(1)
outputSize = self.weight.size(0)
self._weightNorm = self._weightNorm or self.weight.new()
self._inputNorm = self._inputNorm or self.weight.new()
# y_j = (w_j * x) / ( || w_j || * || x || )
torch.norm(self._weightNorm, self.weight, 2, 1).add_(1e-12)
batchSize = input.size(0)
nelement = self.output.nelement()
self.output.resize_(batchSize, outputSize)
if self.output.nelement() != nelement:
self.output.zero_()
self.output.addmm_(0., 1., input, self.weight.t())
torch.norm(self._inputNorm, input, 2, 1).add_(1e-12)
self.output.div_(self._weightNorm.view(1, outputSize).expand_as(self.output))
self.output.div_(self._inputNorm.expand_as(self.output))
return self.output
python类norm()的实例源码
def cosine_similarity(x1, x2, dim=1, eps=1e-8):
r"""Returns cosine similarity between x1 and x2, computed along dim.
Args:
x1 (Variable): First input.
x2 (Variable): Second input (of size matching x1).
dim (int, optional): Dimension of vectors. Default: 1
eps (float, optional): Small value to avoid division by zero. Default: 1e-8
Shape:
- Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`.
- Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`.
"""
w12 = torch.sum(x1 * x2, dim)
w1 = torch.norm(x1, 2, dim)
w2 = torch.norm(x2, 2, dim)
return (w12 / (w1 * w2).clamp(min=eps)).squeeze()
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is flattened into a vector,
i.e. :math:`\lVert v \rVert_p` is not a matrix norm.
With default arguments normalizes over the second dimension with Euclidean norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation
dim (int): the dimension to reduce
eps (float): small value to avoid division by zero
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is
flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
norm.
With default arguments normalizes over the second dimension with Euclidean
norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation. Default: 2
dim (int): the dimension to reduce. Default: 1
eps (float): small value to avoid division by zero. Default: 1e-12
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def test_computes_radial_basis_function_gradient():
a = torch.Tensor([4, 2, 8]).view(3, 1)
b = torch.Tensor([0, 2, 2]).view(3, 1)
lengthscale = 2
kernel = RBFKernel().initialize(log_lengthscale=math.log(lengthscale))
kernel.eval()
param = Variable(torch.Tensor(3, 3).fill_(math.log(lengthscale)), requires_grad=True)
diffs = Variable(a.expand(3, 3) - b.expand(3, 3).transpose(0, 1))
actual_output = (-(diffs ** 2) / param.exp()).exp()
actual_output.backward(torch.eye(3))
actual_param_grad = param.grad.data.sum()
output = kernel(Variable(a), Variable(b))
output.backward(gradient=torch.eye(3))
res = kernel.log_lengthscale.grad.data
assert(torch.norm(res - actual_param_grad) < 1e-5)
def test_inv_matmul():
c_1 = Variable(torch.Tensor([4, 1, 1]), requires_grad=True)
c_2 = Variable(torch.Tensor([4, 1, 1]), requires_grad=True)
T_1 = Variable(torch.zeros(3, 3))
for i in range(3):
for j in range(3):
T_1[i, j] = c_1[abs(i - j)]
T_2 = gpytorch.lazy.ToeplitzLazyVariable(c_2)
B = Variable(torch.randn(3, 4))
res_1 = gpytorch.inv_matmul(T_1, B).sum()
res_2 = gpytorch.inv_matmul(T_2, B).sum()
res_1.backward()
res_2.backward()
assert(torch.norm(res_1.data - res_2.data) < 1e-4)
assert(torch.norm(c_1.grad.data - c_2.grad.data) < 1e-4)
def test_exact_posterior():
train_mean = Variable(torch.randn(4))
train_y = Variable(torch.randn(4))
test_mean = Variable(torch.randn(4))
# Test case
c1_var = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True)
c2_var = Variable(torch.Tensor([6, 0, 1, -1]), requires_grad=True)
indices = Variable(torch.arange(0, 4).long().view(4, 1))
values = Variable(torch.ones(4).view(4, 1))
toeplitz_1 = InterpolatedLazyVariable(ToeplitzLazyVariable(c1_var), indices, values, indices, values)
toeplitz_2 = InterpolatedLazyVariable(ToeplitzLazyVariable(c2_var), indices, values, indices, values)
sum_lv = toeplitz_1 + toeplitz_2
# Actual case
actual = sum_lv.evaluate()
# Test forward
actual_alpha = gpytorch.posterior_strategy(actual).exact_posterior_alpha(train_mean, train_y)
actual_mean = gpytorch.posterior_strategy(actual).exact_posterior_mean(test_mean, actual_alpha)
sum_lv_alpha = sum_lv.posterior_strategy().exact_posterior_alpha(train_mean, train_y)
sum_lv_mean = sum_lv.posterior_strategy().exact_posterior_mean(test_mean, sum_lv_alpha)
assert(torch.norm(actual_mean.data - sum_lv_mean.data) < 1e-4)
def test_gp_prior_and_likelihood():
gp_model = ExactGPModel()
gp_model.covar_module.initialize(log_lengthscale=0) # This shouldn't really do anything now
gp_model.mean_module.initialize(constant=1) # Let's have a mean of 1
gp_model.likelihood.initialize(log_noise=math.log(0.5))
gp_model.eval()
# Let's see how our model does, not conditioned on any data
# The GP prior should predict mean of 1, with a variance of 1
function_predictions = gp_model(train_x)
assert(torch.norm(function_predictions.mean().data - 1) < 1e-5)
assert(torch.norm(function_predictions.var().data - 1.5) < 1e-5)
# The covariance between the furthest apart points should be 1/e
least_covar = function_predictions.covar().data[0, -1]
assert(math.fabs(least_covar - math.exp(-1)) < 1e-5)
def test_backward_inv_mv():
a = torch.Tensor([
[5, -3, 0],
[-3, 5, 0],
[0, 0, 2],
])
b = torch.ones(3, 3).fill_(2)
c = torch.randn(3)
actual_a_grad = -torch.ger(a.inverse().mul_(0.5).mv(torch.ones(3)), a.inverse().mul_(0.5).mv(c)) * 2 * 2
actual_c_grad = (a.inverse() / 2).t().mv(torch.ones(3)) * 2
a_var = Variable(a, requires_grad=True)
c_var = Variable(c, requires_grad=True)
out_var = a_var.mul(Variable(b))
out_var = gpytorch.inv_matmul(out_var, c_var)
out_var = out_var.sum() * 2
out_var.backward()
a_res = a_var.grad.data
c_res = c_var.grad.data
assert(torch.norm(actual_a_grad - a_res) < 1e-4)
assert(torch.norm(actual_c_grad - c_res) < 1e-4)
def get_grads(nBatch=1, nz=10, neq=1, nineq=3, Qscale=1.,
Gscale=1., hscale=1., Ascale=1., bscale=1.):
assert(nBatch == 1)
npr.seed(1)
L = np.random.randn(nz, nz)
Q = Qscale * L.dot(L.T)
G = Gscale * npr.randn(nineq, nz)
# h = hscale*npr.randn(nineq)
z0 = npr.randn(nz)
s0 = npr.rand(nineq)
h = G.dot(z0) + s0
A = Ascale * npr.randn(neq, nz)
# b = bscale*npr.randn(neq)
b = A.dot(z0)
p = npr.randn(nBatch, nz)
# print(np.linalg.norm(p))
truez = npr.randn(nBatch, nz)
Q, p, G, h, A, b, truez = [x.astype(np.float64) for x in
[Q, p, G, h, A, b, truez]]
_, zhat, nu, lam, slacks = qp_cvxpy.forward_single_np(Q, p[0], G, h, A, b)
grads = get_grads_torch(Q, p, G, h, A, b, truez)
return [p[0], Q, G, h, A, b, truez], grads
def th_matrixcorr(x, y):
"""
return a correlation matrix between
columns of x and columns of y.
So, if X.size() == (1000,4) and Y.size() == (1000,5),
then the result will be of size (4,5) with the
(i,j) value equal to the pearsonr correlation coeff
between column i in X and column j in Y
"""
mean_x = th.mean(x, 0)
mean_y = th.mean(y, 0)
xm = x.sub(mean_x.expand_as(x))
ym = y.sub(mean_y.expand_as(y))
r_num = xm.t().mm(ym)
r_den1 = th.norm(xm,2,0)
r_den2 = th.norm(ym,2,0)
r_den = r_den1.t().mm(r_den2)
r_mat = r_num.div(r_den)
return r_mat
def normalized_cross_correlation(self):
w = self.weight.view(self.weight.size(0), -1)
t_norm = torch.norm(w, p=2, dim=1)
if self.in_channels == 1 & sum(self.kernel_size) == 1:
ncc = w.squeeze() / torch.norm(self.t0_norm, p=2)
ncc = ncc - self.start_ncc
return ncc
#mean = torch.mean(w, dim=1).unsqueeze(1).expand_as(w)
mean = torch.mean(w, dim=1).unsqueeze(1) # 0.2 broadcasting
t_factor = w - mean
h_product = self.t0_factor * t_factor
cov = torch.sum(h_product, dim=1) # (w.size(1) - 1)
# had normalization code commented out
denom = self.t0_norm * t_norm
ncc = cov / denom
ncc = ncc - self.start_ncc
return ncc
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is
flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
norm.
With default arguments normalizes over the second dimension with Euclidean
norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation. Default: 2
dim (int): the dimension to reduce. Default: 1
eps (float): small value to avoid division by zero. Default: 1e-12
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def cosine_similarity(x1, x2, dim=1, eps=1e-8):
r"""Returns cosine similarity between x1 and x2, computed along dim.
Args:
x1 (Variable): First input.
x2 (Variable): Second input (of size matching x1).
dim (int, optional): Dimension of vectors. Default: 1
eps (float, optional): Small value to avoid division by zero. Default: 1e-8
Shape:
- Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`.
- Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`.
"""
w12 = torch.sum(x1 * x2, dim)
w1 = torch.norm(x1, 2, dim)
w2 = torch.norm(x2, 2, dim)
return (w12 / (w1 * w2).clamp(min=eps)).squeeze()
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is
flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
norm.
With default arguments normalizes over the second dimension with Euclidean
norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation
dim (int): the dimension to reduce
eps (float): small value to avoid division by zero
"""
return input / torch.norm(input, p, dim).clamp(min=eps).expand_as(input)
utils.py 文件源码
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch
作者: wmingwei
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def joint_train(dbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, k_positive=10, k_negative=10, alpha = [1e-1,1e-1,1]):
u1 = nn.Parameter(torch.zeros(1))
u2 = nn.Parameter(torch.zeros(1))
# optimizer = optim.Adam(dbm.parameters(), lr = lr, weight_decay = weight_decay)
optimizer = optim.SGD(dbm.parameters(), lr = lr, momentum = 0.5)
train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)
optimizer_u = optim.Adam([u1,u2], lr = lr/1000, weight_decay = weight_decay)
for _ in range(epoch):
print("training epoch %i with u1 = %.4f, u2 = %.4f"%(_, u1.data.numpy()[0], u2.data.numpy()[0]))
for batch_idx, (data, target) in enumerate(train_loader):
data = Variable(data)
positive_phase, negative_phase= dbm(v_input = data, k_positive = k_positive, k_negative=k_negative, greedy = False)
loss = energy(dbm = dbm, layer = positive_phase) - energy(dbm = dbm, layer = negative_phase)+alpha[0] * torch.norm(torch.norm(dbm.W[0],2,1)-u1.repeat(dbm.W[0].size()[0],1))**2 + alpha[1]*torch.norm(torch.norm(dbm.W[1],2,1)-u2.repeat(dbm.W[1].size()[0],1))**2 + alpha[2] * (u1 - u2)**2
loss.backward()
optimizer.step()
optimizer.zero_grad()
optimizer_u.step()
optimizer_u.zero_grad()
def PairwiseConfusion(features):
batch_size = features.size(0)
if float(batch_size) % 2 != 0:
raise Exception('Incorrect batch size provided')
batch_left = features[:int(0.5*batch_size)]
batch_right = features[int(0.5*batch_size):]
loss = torch.norm((batch_left - batch_right).abs(),2, 1).sum() / float(batch_size)
return loss
def updateOutput(self, input):
# lazy initialize buffers
self._input = self._input or input.new()
self._weight = self._weight or self.weight.new()
self._expand = self._expand or self.output.new()
self._expand2 = self._expand2 or self.output.new()
self._repeat = self._repeat or self.output.new()
self._repeat2 = self._repeat2 or self.output.new()
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
# y_j = || w_j - x || = || x - w_j ||
assert input.dim() == 2
batchSize = input.size(0)
self._view(self._input, input, batchSize, inputSize, 1)
self._expand = self._input.expand(batchSize, inputSize, outputSize)
# make the expanded tensor contiguous (requires lots of memory)
self._repeat.resize_as_(self._expand).copy_(self._expand)
self._weight = self.weight.view(1, inputSize, outputSize)
self._expand2 = self._weight.expand_as(self._repeat)
if torch.typename(input) == 'torch.cuda.FloatTensor':
# TODO: after adding new allocators this can be changed
# requires lots of memory, but minimizes cudaMallocs and loops
self._repeat2.resize_as_(self._expand2).copy_(self._expand2)
self._repeat.add_(-1, self._repeat2)
else:
self._repeat.add_(-1, self._expand2)
torch.norm(self.output, self._repeat, 2, 1)
self.output.resize_(batchSize, outputSize)
return self.output
def normalize(data, p=2, dim=1, eps=1e-12):
return data / torch.norm(data, p, dim).clamp(min=eps).expand_as(data)
def test_importance_guide(self):
posterior = pyro.infer.Importance(self.model, guide=self.guide, num_samples=10000)
marginal = pyro.infer.Marginal(posterior)
posterior_samples = [marginal() for i in range(1000)]
posterior_mean = torch.mean(torch.cat(posterior_samples))
posterior_stddev = torch.std(torch.cat(posterior_samples), 0)
self.assertEqual(0, torch.norm(posterior_mean - self.mu_mean).data[0],
prec=0.01)
self.assertEqual(0, torch.norm(posterior_stddev - self.mu_stddev).data[0],
prec=0.1)
def test_importance_prior(self):
posterior = pyro.infer.Importance(self.model, guide=None, num_samples=10000)
marginal = pyro.infer.Marginal(posterior)
posterior_samples = [marginal() for i in range(1000)]
posterior_mean = torch.mean(torch.cat(posterior_samples))
posterior_stddev = torch.std(torch.cat(posterior_samples), 0)
self.assertEqual(0, torch.norm(posterior_mean - self.mu_mean).data[0],
prec=0.01)
self.assertEqual(0, torch.norm(posterior_stddev - self.mu_stddev).data[0],
prec=0.1)
def eq(x, y, prec=1e-10):
return (torch.norm(x - y).data[0] < prec)
# XXX name is a bit silly
def EPE(input_flow, target_flow, sparse=False, mean=True):
EPE_map = torch.norm(target_flow-input_flow,2,1)
if sparse:
EPE_map = EPE_map[target_flow != 0]
if mean:
return EPE_map.mean()
else:
return EPE_map.sum()
def _penalty(self, A):
return torch.norm(torch.mm(A, A.t()) - self.I) ** 2
compare-pytorch-and-torch-grads.py 文件源码
项目:densenet.pytorch
作者: bamos
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def printnorm_f(self, input, output):
print('{} norm: {}'.format(self.__class__.__name__, output.data.norm()))
# def printnorm_back(self, grad_input, grad_output):
# import IPython, sys; IPython.embed(); sys.exit(-1)
# print('{} grad_out norm: {}'.format(self.__class__.__name__, self.weight.grad.data.norm()))
compare-pytorch-and-torch-grads.py 文件源码
项目:densenet.pytorch
作者: bamos
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def printM(mods):
for m in mods:
if isinstance(m, legacy.nn.SpatialConvolution):
print('Conv2d norm: {}'.format(torch.norm(m.output)))
elif isinstance(m, legacy.nn.Linear):
pass
elif isinstance(m, legacy.nn.Concat) or \
isinstance(m, legacy.nn.Sequential):
printM(m.modules)
# printM(net_th.modules)
compare-pytorch-and-torch-grads.py 文件源码
项目:densenet.pytorch
作者: bamos
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def getM(mods):
for m in mods:
if isinstance(m, legacy.nn.SpatialConvolution):
m.gradWeight[m.gradWeight.ne(m.gradWeight)] = 0
l.append(torch.norm(m.gradWeight))
elif isinstance(m, legacy.nn.Linear):
l.append(torch.norm(m.gradWeight))
elif isinstance(m, legacy.nn.Concat) or \
isinstance(m, legacy.nn.Sequential):
getM(m.modules)
def updateOutput(self, input):
assert input.dim() == 2
inputSize = self.weight.size(1)
outputSize = self.weight.size(0)
if self._weightNorm is None:
self._weightNorm = self.weight.new()
if self._inputNorm is None:
self._inputNorm = self.weight.new()
# y_j = (w_j * x) / ( || w_j || * || x || )
torch.norm(self.weight, 2, 1, out=self._weightNorm).add_(1e-12)
batchSize = input.size(0)
nelement = self.output.nelement()
self.output.resize_(batchSize, outputSize)
if self.output.nelement() != nelement:
self.output.zero_()
self.output.addmm_(0., 1., input, self.weight.t())
torch.norm(input, 2, 1, out=self._inputNorm).add_(1e-12)
self.output.div_(self._weightNorm.view(1, outputSize).expand_as(self.output))
self.output.div_(self._inputNorm.expand_as(self.output))
return self.output
def updateOutput(self, input):
# lazy initialize buffers
if self._input is None:
self._input = input.new()
if self._weight is None:
self._weight = self.weight.new()
if self._expand is None:
self._expand = self.output.new()
if self._expand2 is None:
self._expand2 = self.output.new()
if self._repeat is None:
self._repeat = self.output.new()
if self._repeat2 is None:
self._repeat2 = self.output.new()
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
# y_j = || w_j - x || = || x - w_j ||
assert input.dim() == 2
batchSize = input.size(0)
self._view(self._input, input, batchSize, inputSize, 1)
self._expand = self._input.expand(batchSize, inputSize, outputSize)
# make the expanded tensor contiguous (requires lots of memory)
self._repeat.resize_as_(self._expand).copy_(self._expand)
self._weight = self.weight.view(1, inputSize, outputSize)
self._expand2 = self._weight.expand_as(self._repeat)
if torch.typename(input) == 'torch.cuda.FloatTensor':
# TODO: after adding new allocators this can be changed
# requires lots of memory, but minimizes cudaMallocs and loops
self._repeat2.resize_as_(self._expand2).copy_(self._expand2)
self._repeat.add_(-1, self._repeat2)
else:
self._repeat.add_(-1, self._expand2)
torch.norm(self._repeat, 2, 1, out=self.output)
self.output.resize_(batchSize, outputSize)
return self.output
def pairwise_ranking_loss(margin, x, v):
zero = torch.zeros(1)
diag_margin = margin * torch.eye(x.size(0))
if not args.no_cuda:
zero, diag_margin = zero.cuda(), diag_margin.cuda()
zero, diag_margin = Variable(zero), Variable(diag_margin)
x = x / torch.norm(x, 2, 1, keepdim=True)
v = v / torch.norm(v, 2, 1, keepdim=True)
prod = torch.matmul(x, v.transpose(0, 1))
diag = torch.diag(prod)
for_x = torch.max(zero, margin - torch.unsqueeze(diag, 1) + prod) - diag_margin
for_v = torch.max(zero, margin - torch.unsqueeze(diag, 0) + prod) - diag_margin
return (torch.sum(for_x) + torch.sum(for_v)) / x.size(0)