def calc_gradient_penalty(self, netD, real_data, fake_data):
alpha = torch.rand(1, 1)
alpha = alpha.expand(real_data.size())
alpha = alpha.cuda()
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
interpolates = interpolates.cuda()
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = netD.forward(interpolates)
gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
create_graph=True, retain_graph=True, only_inputs=True)[0]
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.LAMBDA
return gradient_penalty
python类autograd()的实例源码
def xavier_uniform(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method
described in "Understanding the difficulty of training deep feedforward
neural networks" - Glorot, X. & Bengio, Y. (2010), using a uniform
distribution. The resulting tensor will have values sampled from
:math:`U(-a, a)` where
:math:`a = gain \\times \sqrt{2 / (fan\_in + fan\_out)} \\times \sqrt{3}`.
Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_uniform(w, gain=nn.init.calculate_gain('relu'))
"""
if isinstance(tensor, Variable):
xavier_uniform(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return tensor.uniform_(-a, a)
def xavier_normal(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method
described in "Understanding the difficulty of training deep feedforward
neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`N(0, std)` where
:math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`.
Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_normal(w)
"""
if isinstance(tensor, Variable):
xavier_normal(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
return tensor.normal_(0, std)
def forward(self, x, lengths, hidden):
# Basket Encoding
ub_seqs = [] # users' basket sequence
for user in x: # x shape (batch of user, time_step, indice of product) nested lists
embed_baskets = []
for basket in user:
basket = torch.LongTensor(basket).resize_(1, len(basket))
basket = basket.cuda() if self.config.cuda else basket # use cuda for acceleration
basket = self.encode(torch.autograd.Variable(basket)) # shape: 1, len(basket), embedding_dim
embed_baskets.append(self.pool(basket, dim = 1))
# concat current user's all baskets and append it to users' basket sequence
ub_seqs.append(torch.cat(embed_baskets, 1)) # shape: 1, num_basket, embedding_dim
# Input for rnn
ub_seqs = torch.cat(ub_seqs, 0).cuda() if self.config.cuda else torch.cat(ub_seqs, 0) # shape: batch_size, max_len, embedding_dim
packed_ub_seqs = torch.nn.utils.rnn.pack_padded_sequence(ub_seqs, lengths, batch_first=True) # packed sequence as required by pytorch
# RNN
output, h_u = self.rnn(packed_ub_seqs, hidden)
dynamic_user, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True) # shape: batch_size, max_len, embedding_dim
return dynamic_user, h_u
def test_forward_works_even_with_empty_sequences(self):
lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=11, batch_first=True)
encoder = PytorchSeq2VecWrapper(lstm)
tensor = torch.autograd.Variable(torch.rand([5, 7, 3]))
tensor[1, 6:, :] = 0
tensor[2, :, :] = 0
tensor[3, 2:, :] = 0
tensor[4, :, :] = 0
mask = torch.autograd.Variable(torch.ones(5, 7))
mask[1, 6:] = 0
mask[2, :] = 0
mask[3, 2:] = 0
mask[4, :] = 0
results = encoder(tensor, mask)
for i in (0, 1, 3):
assert not (results[i] == 0.).data.all()
for i in (2, 4):
assert (results[i] == 0.).data.all()
def test_forward_works_even_with_empty_sequences(self):
lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
encoder = PytorchSeq2SeqWrapper(lstm)
tensor = torch.autograd.Variable(torch.rand([5, 7, 3]))
tensor[1, 6:, :] = 0
tensor[2, :, :] = 0
tensor[3, 2:, :] = 0
tensor[4, :, :] = 0
mask = torch.autograd.Variable(torch.ones(5, 7))
mask[1, 6:] = 0
mask[2, :] = 0
mask[3, 2:] = 0
mask[4, :] = 0
results = encoder(tensor, mask)
for i in (0, 1, 3):
assert not (results[i] == 0.).data.all()
for i in (2, 4):
assert (results[i] == 0.).data.all()
def _initializer_wrapper(init_function: Callable[..., None]) -> Type[Initializer]:
class Init(Initializer):
def __init__(self, **kwargs):
self._init_function = init_function
self._kwargs = kwargs
def __call__(self, tensor: torch.autograd.Variable) -> None:
self._init_function(tensor, **self._kwargs)
def __repr__(self):
return 'Init: %s, with params: %s' % (self._init_function, self._kwargs)
@classmethod
def from_params(cls, params: Params):
return cls(**params.as_dict())
return Init
# There are no classes to decorate, so we hack these into Registrable._registry
def get_dropout_mask(dropout_probability: float, tensor_for_masking: torch.autograd.Variable):
"""
Computes and returns an element-wise dropout mask for a given tensor, where
each element in the mask is dropped out with probability dropout_probability.
Note that the mask is NOT applied to the tensor - the tensor is passed to retain
the correct CUDA tensor type for the mask.
Parameters
----------
dropout_probability : float, required.
Probability of dropping a dimension of the input.
tensor_for_masking : torch.Variable, required.
Returns
-------
A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability).
This scaling ensures expected values and variances of the output of applying this mask
and the original tensor are the same.
"""
binary_mask = tensor_for_masking.clone()
binary_mask.data.copy_(torch.rand(tensor_for_masking.size()) > dropout_probability)
# Scale mask by 1/keep_prob to preserve output statistics.
dropout_mask = binary_mask.float().div(1.0 - dropout_probability)
return dropout_mask
def mlpg(means, variances, windows):
"""Maximum Liklihood Paramter Generation (MLPG).
The parameters are almost same as :func:`nnmnkwii.paramgen.mlpg` expects.
The differences are:
- The function assumes ``means`` as :obj:`torch.autograd.Variable`
instead of :obj:`numpy.ndarray`.
- The fucntion assumes ``variances_frames`` as :obj:`torch.FloatTensor`?
instead of :obj:`numpy.ndarray`.
Args:
means (torch.autograd.Variable): Means
variances (torch.FloatTensor): Variances
windows (list): A sequence of window specification
See also:
:obj:`nnmnkwii.autograd.MLPG`, :func:`nnmnkwii.paramgen.mlpg`
"""
T, D = means.size()
if variances.dim() == 1 and variances.shape[0] == D:
variances = variances.expand(T, D)
assert means.size() == variances.size()
return MLPG(variances, windows)(means)
def unit_variance_mlpg(R, means):
"""Special case of MLPG assuming data is normalized to have unit variance.
Args:
means (torch.autograd.Variable): Means, of shape (``T x D``) or
(``T*num_windows x static_dim``). See
:func:`nnmnkwii.paramgen.reshape_means` to reshape means from
(``T x D``) to (``T*num_windows x static_dim``).
R (torch.FloatTensor): MLPG matrix.
See also:
:obj:`nnmnkwii.autograd.UnitVarianceMLPG`,
:func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`,
:func:`reshape_means`.
"""
return UnitVarianceMLPG(R)(means)
def _eq(x, y):
"""
Equality comparison for nested data structures with tensors.
"""
if type(x) is not type(y):
return False
elif isinstance(x, dict):
if set(x.keys()) != set(y.keys()):
return False
return all(_eq(x_val, y[key]) for key, x_val in x.items())
elif isinstance(x, (np.ndarray, torch.Tensor)):
return (x == y).all()
elif isinstance(x, torch.autograd.Variable):
return (x.data == y.data).all()
else:
return x == y
def _dist_and_values(self, *args, **kwargs):
# XXX currently this whole object is very inefficient
values, logits = [], []
for value, logit in self._gen_weighted_samples(*args, **kwargs):
ix = _index(values, value)
if ix == -1:
# Value is new.
values.append(value)
logits.append(logit)
else:
# Value has already been seen.
logits[ix] = util.log_sum_exp(torch.stack([logits[ix], logit]).squeeze())
logits = torch.stack(logits).squeeze()
logits -= util.log_sum_exp(logits)
if not isinstance(logits, torch.autograd.Variable):
logits = Variable(logits)
logits = logits - util.log_sum_exp(logits)
d = dist.Categorical(logits=logits, one_hot=False)
return d, values
def enumerate_support(self):
"""
Returns the Bernoulli distribution's support, as a tensor along the first dimension.
Note that this returns support values of all the batched RVs in lock-step, rather
than the full cartesian product. To iterate over the cartesian product, you must
construct univariate Bernoullis and use itertools.product() over all univariate
variables (may be expensive).
:return: torch variable enumerating the support of the Bernoulli distribution.
Each item in the return value, when enumerated along the first dimensions, yields a
value from the distribution's support which has the same dimension as would be returned by
sample.
:rtype: torch.autograd.Variable.
"""
return Variable(torch.stack([torch.Tensor([t]).expand_as(self.ps) for t in [0, 1]]))
def log_pdf(self, y, *args, **kwargs):
"""
:param y: a value sampled from the transformed distribution
:type y: torch.autograd.Variable
:returns: the score (the log pdf) of y
:rtype: torch.autograd.Variable
Scores the sample by inverting the bijector(s) and computing the score using the score
of the base distribution and the log det jacobian
"""
inverses = []
next_to_invert = y
for bijector in reversed(self.bijectors):
inverse = bijector.inverse(next_to_invert)
inverses.append(inverse)
next_to_invert = inverse
log_pdf_base = self.base_dist.log_pdf(inverses[-1], *args, **kwargs)
log_det_jacobian = self.bijectors[-1].log_det_jacobian(y, *args, **kwargs)
for bijector, inverse in zip(list(reversed(self.bijectors))[1:], inverses[:-1]):
log_det_jacobian += bijector.log_det_jacobian(inverse, *args, **kwargs)
return log_pdf_base - log_det_jacobian
def log_beta(t):
"""
Computes log Beta function.
:param t:
:type t: torch.autograd.Variable of dimension 1 or 2
:rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2)
"""
assert t.dim() in (1, 2)
if t.dim() == 1:
numer = torch.sum(log_gamma(t))
denom = log_gamma(torch.sum(t))
else:
numer = torch.sum(log_gamma(t), 1)
denom = log_gamma(torch.sum(t, 1))
return numer - denom
def sample(self):
"""
Draws either a single sample (if alpha.dim() == 1), or one sample per param (if alpha.dim() == 2).
(Un-reparameterized).
:param torch.autograd.Variable alpha:
"""
alpha_np = self.alpha.data.cpu().numpy()
if self.alpha.dim() == 1:
x_np = spr.dirichlet.rvs(alpha_np)[0]
else:
x_np = np.empty_like(alpha_np)
for i in range(alpha_np.shape[0]):
x_np[i, :] = spr.dirichlet.rvs(alpha_np[i, :])[0]
x = Variable(type(self.alpha.data)(x_np))
return x
def batch_log_pdf(self, x):
"""
Evaluates log probability density over one or a batch of samples.
Each of alpha and x can be either a single value or a batch of values batched along dimension 0.
If they are both batches, their batch sizes must agree.
In any case, the rightmost size must agree.
:param torch.autograd.Variable x: A value (if x.dim() == 1) or or batch of values (if x.dim() == 2).
:param alpha: A vector of concentration parameters.
:type alpha: torch.autograd.Variable or None.
:return: log probability densities of each element in the batch.
:rtype: torch.autograd.Variable of torch.Tensor of dimension 1.
"""
alpha = self.alpha.expand(self.shape(x))
x_sum = torch.sum(torch.mul(alpha - 1, torch.log(x)), -1)
beta = log_beta(alpha)
batch_log_pdf_shape = self.batch_shape(x) + (1,)
return (x_sum - beta).contiguous().view(batch_log_pdf_shape)
def test_multi_gpu(self):
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.nn.parallel.data_parallel import data_parallel
from inferno.extensions.containers.graph import Graph
input_shape = [8, 1, 3, 128, 128]
model = Graph() \
.add_input_node('input') \
.add_node('conv0', nn.Conv3d(1, 10, 3, padding=1), previous='input') \
.add_node('conv1', nn.Conv3d(10, 1, 3, padding=1), previous='conv0') \
.add_output_node('output', previous='conv1')
model.cuda()
input = Variable(torch.rand(*input_shape).cuda())
output = data_parallel(model, input, device_ids=[0, 1, 2, 3])
def test_forward(self):
import torch
from torch.autograd import Variable
from reid.models.inception import InceptionNet
# model = Inception(num_classes=5, num_features=256, dropout=0.5)
# x = Variable(torch.randn(10, 3, 144, 56), requires_grad=False)
# y = model(x)
# self.assertEquals(y.size(), (10, 5))
model = InceptionNet(num_features=8, norm=True, dropout=0)
x = Variable(torch.randn(10, 3, 144, 56), requires_grad=False)
y = model(x)
self.assertEquals(y.size(), (10, 8))
self.assertEquals(y.norm(2, 1).max(), 1)
self.assertEquals(y.norm(2, 1).min(), 1)
def test_forward_backward(self):
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from reid.loss import OIMLoss
criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
criterion.lut = torch.eye(3)
x = Variable(torch.randn(3, 3), requires_grad=True)
y = Variable(torch.range(0, 2).long())
loss = criterion(x, y)
loss.backward()
probs = F.softmax(x)
grads = probs.data - torch.eye(3)
abs_diff = torch.abs(grads - x.grad.data)
self.assertEquals(torch.log(probs).diag().sum(), -loss)
self.assertTrue(torch.max(abs_diff) < 1e-6)
def _forward_alg(self, feats):
# calculate in log domain
# feats is len(sentence) * tagset_size
# initialize alpha with a Tensor with values all equal to -10000.
init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
forward_var = autograd.Variable(init_alphas)
if self.use_gpu:
forward_var = forward_var.cuda()
for feat in feats:
emit_score = feat.view(-1, 1)
tag_var = forward_var + self.transitions + emit_score
max_tag_var, _ = torch.max(tag_var, dim=1)
tag_var = tag_var - max_tag_var.view(-1, 1)
forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1)
terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1)
alpha = log_sum_exp(terminal_var)
# Z(x)
return alpha
def reinforce_backward(self, reward, output_mask=None):
"""
If output_mask is not None, then it should be a FloatTensor of shape (N, T)
giving a multiplier to the output.
"""
assert self.multinomial_outputs is not None, 'Must call reinforce_sample first'
grad_output = []
def gen_hook(mask):
def hook(grad):
return grad * mask.contiguous().view(-1, 1).expand_as(grad)
return hook
if output_mask is not None:
for t, probs in enumerate(self.multinomial_probs):
mask = Variable(output_mask[:, t])
probs.register_hook(gen_hook(mask))
for sampled_output in self.multinomial_outputs:
sampled_output.reinforce(reward)
grad_output.append(None)
torch.autograd.backward(self.multinomial_outputs, grad_output, retain_variables=True)
def uniform(tensor, a=0, b=1):
"""Fills the input Tensor or Variable with values drawn from the uniform distribution :math:`U(a, b)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the lower bound of the uniform distribution
b: the upper bound of the uniform distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.uniform(w)
"""
if isinstance(tensor, Variable):
uniform(tensor.data, a=a, b=b)
return tensor
return tensor.uniform_(a, b)
def normal(tensor, mean=0, std=1):
"""Fills the input Tensor or Variable with values drawn from the normal distribution :math:`N(mean, std)`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
mean: the mean of the normal distribution
std: the standard deviation of the normal distribution
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.normal(w)
"""
if isinstance(tensor, Variable):
normal(tensor.data, mean=mean, std=std)
return tensor
return tensor.normal_(mean, std)
def constant(tensor, val):
"""Fills the input Tensor or Variable with the value `val`.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
val: the value to fill the tensor with
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.constant(w)
"""
if isinstance(tensor, Variable):
constant(tensor.data, val)
return tensor
return tensor.fill_(val)
def eye(tensor):
"""Fills the 2-dimensional input Tensor or Variable with the identity matrix. Preserves the identity of the inputs in
Linear layers, where as many inputs are preserved as possible.
Args:
tensor: a 2-dimensional torch.Tensor or autograd.Variable
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.eye(w)
"""
if tensor.ndimension() != 2:
raise ValueError("Only tensors with 2 dimensions are supported")
if isinstance(tensor, Variable):
eye(tensor.data)
return tensor
return tensor.copy_(torch.eye(tensor.size(0), tensor.size(1)))
def xavier_normal(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method described in "Understanding the
difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where
:math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`. Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_normal(w)
"""
if isinstance(tensor, Variable):
xavier_normal(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
return tensor.normal_(0, std)
def kaiming_uniform(tensor, a=0, mode='fan_in'):
"""Fills the input Tensor or Variable with values according to the method described in "Delving deep into
rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a uniform
distribution. The resulting tensor will have values sampled from :math:`U(-bound, bound)` where
:math:`bound = \sqrt{2 / ((1 + a^2) \\times fan\_in)} \\times \sqrt{3}`. Also known as He initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the negative slope of the rectifier used after this layer (0 for ReLU by default)
mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the
weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass.
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.kaiming_uniform(w, mode='fan_in')
"""
if isinstance(tensor, Variable):
kaiming_uniform(tensor.data, a=a, mode=mode)
return tensor
fan = _calculate_correct_fan(tensor, mode)
gain = calculate_gain('leaky_relu', a)
std = gain / math.sqrt(fan)
bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return tensor.uniform_(-bound, bound)
def kaiming_normal(tensor, a=0, mode='fan_in'):
"""Fills the input Tensor or Variable with values according to the method described in "Delving deep into
rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a normal
distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where
:math:`std = \sqrt{2 / ((1 + a^2) \\times fan\_in)}`. Also known as He initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
a: the negative slope of the rectifier used after this layer (0 for ReLU by default)
mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the
weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass.
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.kaiming_normal(w, mode='fan_out')
"""
if isinstance(tensor, Variable):
kaiming_normal(tensor.data, a=a, mode=mode)
return tensor
fan = _calculate_correct_fan(tensor, mode)
gain = calculate_gain('leaky_relu', a)
std = gain / math.sqrt(fan)
return tensor.normal_(0, std)
def test_grad_nonleaf_many_outputs(self):
# This checks an edge case for function callbacks
# We want to capture two grads of a function, but can only
# register a single callback.
x = Variable(torch.randn(4, 2), requires_grad=True)
a, b = x.chunk(2)
def hook(*grads):
hook_called[0] = True
hook_called = [False]
x.register_hook(hook)
go = torch.randn(2, 2)
grad_a, grad_b = torch.autograd.grad(
(a + 2 * b), [a, b], grad_outputs=go, create_graph=True)
self.assertEqual(grad_a, go)
self.assertEqual(grad_b, go * 2)
self.assertFalse(hook_called[0])
self.assertIsNone(x.grad)