def test_multi_backward(self):
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), requires_grad=True)
q = Variable(torch.randn(5, 5), requires_grad=True)
a = Variable(torch.randn(5, 5), requires_grad=True)
b = Variable(torch.randn(5, 5), requires_grad=True)
q2 = q * 2
z = x + y + q2
c = a * b + q2
grad_z = torch.randn(5, 5)
grad_c = torch.randn(5, 5)
torch.autograd.backward([z, c], [grad_z, grad_c])
self.assertEqual(x.grad.data, grad_z)
self.assertEqual(y.grad.data, grad_z)
self.assertEqual(a.grad.data, grad_c * b.data)
self.assertEqual(b.grad.data, grad_c * a.data)
self.assertEqual(q.grad.data, (grad_c + grad_z) * 2)
python类autograd()的实例源码
def shapes_all(data):
"""
Recursively walks the data (can be tuples, lists, or dict) and
replaces a tensor with its shape tuple whenever it meets a tensor
"""
if isinstance(data, (tuple, list)):
ans = map(shapes_all, data)
return type(data)(ans)
elif isinstance(data, dict):
return {k: shapes_all(v) for k, v in data.items()}
elif (isinstance(data, np.ndarray)
or torch.is_tensor(data)
or isinstance(data, torch.autograd.Variable)
or isinstance(data, torch.nn.Parameter)):
return shape(data)
else:
return data
def uniform(tensor, a=0, b=1):
"""Fills the input Tensor or Variable with values drawn from the uniform distribution :math:`U(a, b)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the lower bound of the uniform distribution
b: the upper bound of the uniform distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.uniform(w)
"""
if isinstance(tensor, Variable):
uniform(tensor.data, a=a, b=b)
return tensor
return tensor.uniform_(a, b)
def normal(tensor, mean=0, std=1):
"""Fills the input Tensor or Variable with values drawn from the normal distribution :math:`N(mean, std)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
mean: the mean of the normal distribution
std: the standard deviation of the normal distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.normal(w)
"""
if isinstance(tensor, Variable):
normal(tensor.data, mean=mean, std=std)
return tensor
return tensor.normal_(mean, std)
def constant(tensor, val):
"""Fills the input Tensor or Variable with the value `val`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
val: the value to fill the tensor with
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.constant(w)
"""
if isinstance(tensor, Variable):
constant(tensor.data, val)
return tensor
return tensor.fill_(val)
def eye(tensor):
"""Fills the 2-dimensional input Tensor or Variable with the identity matrix. Preserves the identity of the inputs in
Linear layers, where as many inputs are preserved as possible.
Args:
tensor: a 2-dimensional torch.Tensor or autograd.Variable
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.eye(w)
"""
if tensor.ndimension() != 2:
raise ValueError("Only tensors with 2 dimensions are supported")
if isinstance(tensor, Variable):
eye(tensor.data)
return tensor
return tensor.copy_(torch.eye(tensor.size(0), tensor.size(1)))
def xavier_normal(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method described in "Understanding the
difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where
:math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`. Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_normal(w)
"""
if isinstance(tensor, Variable):
xavier_normal(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
return tensor.normal_(0, std)
def kaiming_uniform(tensor, a=0, mode='fan_in'):
"""Fills the input Tensor or Variable with values according to the method described in "Delving deep into
rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a uniform
distribution. The resulting tensor will have values sampled from :math:`U(-bound, bound)` where
:math:`bound = \sqrt{2 / ((1 + a^2) \\times fan\_in)} \\times \sqrt{3}`. Also known as He initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the negative slope of the rectifier used after this layer (0 for ReLU by default)
mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the
weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass.
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.kaiming_uniform(w, mode='fan_in')
"""
if isinstance(tensor, Variable):
kaiming_uniform(tensor.data, a=a, mode=mode)
return tensor
fan = _calculate_correct_fan(tensor, mode)
gain = calculate_gain('leaky_relu', a)
std = gain / math.sqrt(fan)
bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return tensor.uniform_(-bound, bound)
def kaiming_normal(tensor, a=0, mode='fan_in'):
"""Fills the input Tensor or Variable with values according to the method described in "Delving deep into
rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a normal
distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where
:math:`std = \sqrt{2 / ((1 + a^2) \\times fan\_in)}`. Also known as He initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the negative slope of the rectifier used after this layer (0 for ReLU by default)
mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the
weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass.
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.kaiming_normal(w, mode='fan_out')
"""
if isinstance(tensor, Variable):
kaiming_normal(tensor.data, a=a, mode=mode)
return tensor
fan = _calculate_correct_fan(tensor, mode)
gain = calculate_gain('leaky_relu', a)
std = gain / math.sqrt(fan)
return tensor.normal_(0, std)
def test_grad_nonleaf_many_outputs(self):
# This checks an edge case for function callbacks
# We want to capture two grads of a function, but can only
# register a single callback.
x = Variable(torch.randn(4, 2), requires_grad=True)
a, b = x.chunk(2)
def hook(*grads):
hook_called[0] = True
hook_called = [False]
x.register_hook(hook)
go = torch.randn(2, 2)
grad_a, grad_b = torch.autograd.grad(
(a + 2 * b), [a, b], grad_outputs=go, create_graph=True)
self.assertEqual(grad_a.data, go)
self.assertEqual(grad_b.data, go * 2)
self.assertFalse(hook_called[0])
self.assertIsNone(x.grad)
def test_multi_backward(self):
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), requires_grad=True)
q = Variable(torch.randn(5, 5), requires_grad=True)
a = Variable(torch.randn(5, 5), requires_grad=True)
b = Variable(torch.randn(5, 5), requires_grad=True)
q2 = q * 2
z = x + y + q2
c = a * b + q2
grad_z = torch.randn(5, 5)
grad_c = torch.randn(5, 5)
torch.autograd.backward([z, c], [grad_z, grad_c])
self.assertEqual(x.grad.data, grad_z)
self.assertEqual(y.grad.data, grad_z)
self.assertEqual(a.grad.data, grad_c * b.data)
self.assertEqual(b.grad.data, grad_c * a.data)
self.assertEqual(q.grad.data, (grad_c + grad_z) * 2)
def batch_predictions(self, images):
# lazy import
import torch
from torch.autograd import Variable
images = self._process_input(images)
n = len(images)
images = torch.from_numpy(images)
if self.cuda: # pragma: no cover
images = images.cuda()
images = Variable(images, volatile=True)
predictions = self._model(images)
predictions = predictions.data
if self.cuda: # pragma: no cover
predictions = predictions.cpu()
predictions = predictions.numpy()
assert predictions.ndim == 2
assert predictions.shape == (n, self.num_classes())
return predictions
def _loss_fn(self, image, label):
# lazy import
import torch
import torch.nn as nn
from torch.autograd import Variable
image = self._process_input(image)
target = np.array([label])
target = torch.from_numpy(target)
if self.cuda: # pragma: no cover
target = target.cuda()
target = Variable(target)
images = torch.from_numpy(image[None])
if self.cuda: # pragma: no cover
images = images.cuda()
images = Variable(images, volatile=True)
predictions = self._model(images)
ce = nn.CrossEntropyLoss()
loss = ce(predictions, target)
loss = loss.data
if self.cuda: # pragma: no cover
loss = loss.cpu()
loss = loss.numpy()
return loss
softmax_with_cross_entropy_loss.py 文件源码
项目:pytorch-misc
作者: Jiaming-Liu
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def backward(self, sth):
input, label = self.saved_tensors
grad_fs = grad_label = None
if self.needs_input_grad[0]:
fs = torch.nn.Softmax()(
torch.autograd.Variable(input, requires_grad=False)
).data
# neg. one hot label
y = input.new().resize_as_(input).zero_()
for i, l in enumerate(label):
y[i, l] = -1.
fs.add_(y).mul_(1. / len(label))
grad_fs = fs
if self.needs_input_grad[1]:
raise NotImplementedError()
return grad_fs, grad_label
def eval_model(dataset_loader, encoding, model):
model.eval()
print "evaluating model..."
top1 = imSituTensorEvaluation(1, 3, encoding)
top5 = imSituTensorEvaluation(5, 3, encoding)
mx = len(dataset_loader)
for i, (index, input, target) in enumerate(dataset_loader):
print "{}/{} batches\r".format(i+1,mx) ,
input_var = torch.autograd.Variable(input.cuda(), volatile = True)
target_var = torch.autograd.Variable(target.cuda(), volatile = True)
(scores,predictions) = model.forward_max(input_var)
(s_sorted, idx) = torch.sort(scores, 1, True)
top1.add_point(target, predictions.data, idx.data)
top5.add_point(target, predictions.data, idx.data)
print "\ndone."
return (top1, top5)
def _viterbi_decode(self, feats):
backpointers = []
init_alphas = torch.Tensor(self.tagset_size, 1).fill_(0.).type(self.dtype)
forward_var = autograd.Variable(init_alphas).type(self.dtype)
for ix,feat in enumerate(feats):
if ix == 0:
forward_var += feat.view(self.tagset_size, 1) + self.initial_weights
else:
viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose( forward_var.repeat(1, self.tagset_size), 0 ,1), 1)
forward_var = feat.view(self.tagset_size,1) + viterbi_vars
backpointers.append(viterbi_idx)
terminal_var = forward_var + self.final_weights
_ , best_tag_id = torch.max(terminal_var,0)
best_tag_id = to_scalar(best_tag_id)
path_score = terminal_var[best_tag_id]
best_path = [best_tag_id]
for bptrs_t in reversed(backpointers):
best_tag_id = to_scalar(bptrs_t[best_tag_id])
best_path.append(best_tag_id)
best_path.reverse()
return path_score, best_path
def _viterbi_decode(self, feats):
backpointers = []
init_vvars = torch.Tensor(self.tagset_size, 1).fill_(-10000.).type(self.dtype)
init_vvars[self.tag_to_ix[self.START_TAG]][0] = 0
forward_var = autograd.Variable(init_vvars).type(self.dtype)
for feat in feats:
viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose(forward_var.expand(forward_var.size(0), self.tagset_size), 0, 1), 1)
forward_var = feat.view(self.tagset_size, 1) + viterbi_vars
backpointers.append(viterbi_idx)
terminal_var = forward_var + self.transitions[self.tag_to_ix[self.STOP_TAG]].view(self.tagset_size, 1)
_, best_tag_id = torch.max(terminal_var, 0, keepdim=True)
best_tag_id = to_scalar(best_tag_id)
path_score = terminal_var[best_tag_id]
best_path = [best_tag_id]
for bptrs_t in reversed(backpointers):
best_tag_id = to_scalar(bptrs_t[best_tag_id])
best_path.append(best_tag_id)
start = best_path.pop()
assert start == self.tag_to_ix[self.START_TAG] # Sanity check
best_path.reverse()
return path_score, best_path
def init_hidden(self, gpu, last_batch_size=None):
if last_batch_size is None: lstm_hidden_batch_size = self.batch_size
else: lstm_hidden_batch_size = last_batch_size
dims = (self.lstm_layer, lstm_hidden_batch_size, self.lstm_hidden_dim)
if self.bilstm_flag:
dims = (2*self.lstm_layer, lstm_hidden_batch_size, self.lstm_hidden_dim)
init_value = torch.Tensor(np.random.uniform(-0.01, 0.01, dims))
#init_value = torch.zeros(dims)
h0 = autograd.Variable(init_value)
c0 = autograd.Variable(init_value)
if gpu:
h0 = h0.cuda()
c0 = c0.cuda()
return (h0,c0)
# from: Variable of batch_size*sent_length*embedding_dim
# to: Variable of batch_size*embedding_dim*sent_length
def uniform(tensor, a=0, b=1):
"""Fills the input Tensor or Variable with values drawn from the uniform
distribution :math:`U(a, b)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the lower bound of the uniform distribution
b: the upper bound of the uniform distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.uniform(w)
"""
if isinstance(tensor, Variable):
uniform(tensor.data, a=a, b=b)
return tensor
return tensor.uniform_(a, b)
def normal(tensor, mean=0, std=1):
"""Fills the input Tensor or Variable with values drawn from the normal
distribution :math:`N(mean, std)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
mean: the mean of the normal distribution
std: the standard deviation of the normal distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.normal(w)
"""
if isinstance(tensor, Variable):
normal(tensor.data, mean=mean, std=std)
return tensor
return tensor.normal_(mean, std)