def update_state(self, step, src_seq, enc_outputs, un_dones):
input_pos = torch.arange(1, step+1).unsqueeze(0)
input_pos = input_pos.repeat(un_dones, 1)
input_pos = Variable(input_pos.long(), volatile=True)
src_seq_beam = Variable(src_seq.data.repeat(un_dones, 1))
enc_outputs_beam = [Variable(enc_output.data.repeat(un_dones, 1, 1)) for enc_output in enc_outputs]
return input_pos, src_seq_beam, enc_outputs_beam
python类arange()的实例源码
def decode(self, seq, pos):
def length_penalty(step, len_penalty_w=1.):
return (torch.log(self.torch.FloatTensor([5 + step])) - torch.log(self.torch.FloatTensor([6])))*len_penalty_w
top_seqs = [([BOS], 0)] * self.beam_size
enc_outputs = self.model.enc(seq, pos)
seq_beam = Variable(seq.data.repeat(self.beam_size, 1))
enc_outputs_beam = [Variable(enc_output.data.repeat(self.beam_size, 1, 1)) for enc_output in enc_outputs]
input_data = self.init_input()
input_pos = torch.arange(1, 2).unsqueeze(0)
input_pos = input_pos.repeat(self.beam_size, 1)
input_pos = Variable(input_pos.long(), volatile=True)
for step in range(1, self.args.max_word_len+1):
if self.cuda:
input_pos = input_pos.cuda()
input_data = input_data.cuda()
dec_output = self.model.dec(enc_outputs_beam,
seq_beam, input_data, input_pos)
dec_output = dec_output[:, -1, :] # word level feature
out = F.log_softmax(self.model.linear(dec_output))
lp = length_penalty(step)
top_seqs, all_done, un_dones = self.beam_search(out.data+lp, top_seqs)
if all_done: break
input_data = self.update_input(top_seqs)
input_pos, src_seq_beam, enc_outputs_beam = self.update_state(step+1, seq, enc_outputs, un_dones)
tgts = []
for seq in top_seqs:
cor_idxs, score = seq
cor_idxs = cor_idxs[1: -1]
tgts += [(" ".join([self.src_idx2word[idx] for idx in cor_idxs]), score)]
return tgts
def make_positions(tokens, padding_idx, left_pad, offset=0):
seqlen = tokens.size(1)
if not hasattr(make_positions, 'range'):
make_positions.range = tokens.new()
if make_positions.range.numel() < offset + seqlen:
# offset positions by the padding index
torch.arange(padding_idx + 1, padding_idx + 1 + offset + seqlen,
out=make_positions.range)
mask = tokens.ne(padding_idx)
positions = make_positions.range[offset:offset+seqlen].expand_as(tokens)
if left_pad:
positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1)
return tokens.clone().masked_scatter_(mask, positions[mask])
def test_gmm_iter_discrete_traces(model, data_size, graph_type):
pyro.clear_param_store()
data = Variable(torch.arange(0, data_size))
traces = list(iter_discrete_traces(graph_type, model, data=data, verbose=True))
# This non-vectorized version is exponential in data_size:
assert len(traces) == 2 ** data_size
# A Gaussian mixture model, with vectorized batching.
def test_gmm_batch_iter_discrete_traces(model, data_size, graph_type):
pyro.clear_param_store()
data = Variable(torch.arange(0, data_size))
traces = list(iter_discrete_traces(graph_type, model, data=data))
# This vectorized version is independent of data_size:
assert len(traces) == 2
def setUp(self):
self.v = Variable(torch.Tensor([3]))
self.vs = Variable(torch.Tensor([[0], [1], [2], [3]]))
self.vs_expanded = self.vs.expand(4, 3)
self.test_data = Variable(torch.Tensor([[3], [3], [3]]))
self.batch_test_data_1 = Variable(torch.arange(0, 4).unsqueeze(1).expand(4, 3))
self.batch_test_data_2 = Variable(torch.arange(4, 8).unsqueeze(1).expand(4, 3))
self.batch_test_data_3 = Variable(torch.Tensor([[3], [3], [3], [3]]))
self.expected_support = [[0], [1], [2], [3]]
self.expected_support_non_vec = [3]
self.analytic_mean = 3
self.analytic_var = 0
self.n_samples = 10
def enumerate_support(self):
"""
Returns the categorical distribution's support, as a tensor along the first dimension.
Note that this returns support values of all the batched RVs in lock-step, rather
than the full cartesian product. To iterate over the cartesian product, you must
construct univariate Categoricals and use itertools.product() over all univariate
variables (but this is very expensive).
:param ps: Tensor where the last dimension denotes the event probabilities, *p_k*,
which must sum to 1. The remaining dimensions are considered batch dimensions.
:type ps: torch.autograd.Variable
:param vs: Optional parameter, enumerating the items in the support. This could either
have a numeric or string type. This should have the same dimension as ``ps``.
:type vs: list or numpy.ndarray or torch.autograd.Variable
:param one_hot: Denotes whether one hot encoding is enabled. This is True by default.
When set to false, and no explicit `vs` is provided, the last dimension gives
the one-hot encoded value from the support.
:type one_hot: boolean
:return: Torch variable or numpy array enumerating the support of the categorical distribution.
Each item in the return value, when enumerated along the first dimensions, yields a
value from the distribution's support which has the same dimension as would be returned by
sample. If ``one_hot=True``, the last dimension is used for the one-hot encoding.
:rtype: torch.autograd.Variable or numpy.ndarray.
"""
sample_shape = self.batch_shape() + (1,)
support_samples_size = (self.event_shape()) + sample_shape
vs = self.vs
if vs is not None:
if isinstance(vs, np.ndarray):
return vs.transpose().reshape(*support_samples_size)
else:
return torch.transpose(vs, 0, -1).contiguous().view(support_samples_size)
if self.one_hot:
return Variable(torch.stack([t.expand_as(self.ps) for t in torch_eye(*self.event_shape())]))
else:
LongTensor = torch.cuda.LongTensor if self.ps.is_cuda else torch.LongTensor
return Variable(
torch.stack([LongTensor([t]).expand(sample_shape)
for t in torch.arange(0, *self.event_shape()).long()]))
def mask_for_lengths(length, max_length=None, mask_right=True, value=-1e6):
max_length = max_length or length.max().data[0]
mask = torch.cuda.IntTensor() if length.is_cuda else torch.IntTensor()
mask = torch.arange(0, max_length, 1, out=mask)
mask = torch.autograd.Variable(mask).type_as(length)
mask /= length.unsqueeze(1)
mask = mask.clamp(0, 1)
mask = mask.float()
if not mask_right:
mask = 1.0 - mask
mask *= value
return mask
def mask_for_lengths(length, max_length=None, mask_right=True, value=-1e6):
max_length = max_length or length.max().data[0]
mask = torch.cuda.IntTensor() if length.is_cuda else torch.IntTensor()
mask = torch.arange(0, max_length, 1, out=mask)
mask = torch.autograd.Variable(mask).type_as(length)
mask /= length.unsqueeze(1)
mask = mask.clamp(0, 1)
mask = mask.float()
if not mask_right:
mask = 1.0 - mask
mask *= value
return mask
def get_scores(self):
self.model.eval()
num_classes = self.dataset_cls.NUM_CLASSES
predict_classes = torch.arange(1, num_classes + 1).expand(self.batch_size, num_classes)
test_kl_div_loss = 0
predictions = []
true_labels = []
for batch in self.data_loader:
output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
# handle last batch which might have smaller size
if len(predict_classes) != len(batch.sentence_1):
predict_classes = torch.arange(1, num_classes + 1).expand(len(batch.sentence_1), num_classes)
if self.data_loader.device != -1:
with torch.cuda.device(self.device):
predict_classes = predict_classes.cuda()
true_labels.append((predict_classes * batch.label.data).sum(dim=1))
predictions.append((predict_classes * output.data.exp()).sum(dim=1))
del output
predictions = torch.cat(predictions).cpu().numpy()
true_labels = torch.cat(true_labels).cpu().numpy()
test_kl_div_loss /= len(batch.dataset.examples)
pearson_r = pearsonr(predictions, true_labels)[0]
spearman_r = spearmanr(predictions, true_labels)[0]
return [pearson_r, spearman_r, test_kl_div_loss], ['pearson_r', 'spearman_r', 'KL-divergence loss']
def get_scores(self):
self.model.eval()
num_classes = self.dataset_cls.NUM_CLASSES
predict_classes = torch.arange(0, num_classes).expand(self.batch_size, num_classes)
test_kl_div_loss = 0
predictions = []
true_labels = []
for batch in self.data_loader:
output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
# handle last batch which might have smaller size
if len(predict_classes) != len(batch.sentence_1):
predict_classes = torch.arange(0, num_classes).expand(len(batch.sentence_1), num_classes)
if self.data_loader.device != -1:
with torch.cuda.device(self.device):
predict_classes = predict_classes.cuda()
true_labels.append((predict_classes * batch.label.data).sum(dim=1))
predictions.append((predict_classes * output.data.exp()).sum(dim=1))
del output
predictions = torch.cat(predictions).cpu().numpy()
true_labels = torch.cat(true_labels).cpu().numpy()
test_kl_div_loss /= len(batch.dataset.examples)
pearson_r = pearsonr(predictions, true_labels)[0]
return [pearson_r, test_kl_div_loss], ['pearson_r', 'KL-divergence loss']
def make_positions(tokens, padding_idx, left_pad, offset=0):
seqlen = tokens.size(1)
if not hasattr(make_positions, 'range'):
make_positions.range = tokens.new()
if make_positions.range.numel() < offset + seqlen:
# offset positions by the padding index
torch.arange(padding_idx + 1, padding_idx + 1 + offset + seqlen,
out=make_positions.range)
mask = tokens.ne(padding_idx)
positions = make_positions.range[offset:offset+seqlen].expand_as(tokens)
if left_pad:
positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1)
return tokens.clone().masked_scatter_(mask, positions[mask])
def forward(self, ft, scaling, seg_split):
x1 = seg_split[0]
x2 = seg_split[1]
n_seg = seg_split[2]
ft_dim = ft.size()[1]
src = ft.view(-1, n_seg, ft_dim)
scaling = scaling.view(-1, 2)
n_sample = src.size()[0]
def get_stage_stpp(stage_ft, stage_parts, norm_num, scaling):
stage_stpp = []
stage_len = stage_ft.size(1)
for n_part in stage_parts:
ticks = torch.arange(0, stage_len + 1e-5, stage_len / n_part)
for i in range(n_part):
part_ft = stage_ft[:, int(ticks[i]):int(ticks[i+1]), :].mean(dim=1) / norm_num
if scaling is not None:
part_ft = part_ft * scaling.resize(n_sample, 1)
stage_stpp.append(part_ft)
return stage_stpp
feature_parts = []
feature_parts.extend(get_stage_stpp(src[:, :x1, :], self.parts[0], self.norm_num[0], scaling[:, 0])) # starting
feature_parts.extend(get_stage_stpp(src[:, x1:x2, :], self.parts[1], self.norm_num[1], None)) # course
feature_parts.extend(get_stage_stpp(src[:, x2:, :], self.parts[2], self.norm_num[2], scaling[:, 1])) # ending
stpp_ft = torch.cat(feature_parts, dim=1)
if not self.sc:
return stpp_ft, stpp_ft
else:
course_ft = src[:, x1:x2, :].mean(dim=1)
return course_ft, stpp_ft
def updateGradInput(self, input, gradOutput):
input, mask = input
if input.type() == 'torch.cuda.FloatTensor':
torch.arange(0, mask.nelement(), out=self._maskIndexBufferCPU).resize_(mask.size())
self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU)
else:
torch.arange(0, mask.nelement(), out=self._maskIndexBuffer).resize_(mask.size())
torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices)
self._gradBuffer.resize_(input.nelement()).zero_()
self._gradBuffer.scatter_(0, self._maskIndices, gradOutput)
self._gradBuffer.resize_(input.size())
self.gradInput = [self._gradBuffer, self._gradMask.resize_(mask.size()).fill_(0)]
return self.gradInput
def test_load_state_dict(self):
l = nn.Linear(5, 5)
block = nn.Module()
block.conv1 = nn.Conv2d(3, 3, 3, bias=True)
block.conv2 = nn.Conv2d(3, 3, 3, bias=False)
net = nn.Module()
net.linear1 = l
net.linear2 = l
net.bn = nn.BatchNorm2d(2)
net.block = block
net.add_module('empty', None)
state_dict = net.state_dict()
state_dict.update({
'linear1.weight': torch.ones(5, 5),
'block.conv1.bias': torch.arange(1, 4),
'bn.running_mean': torch.randn(2),
})
net.load_state_dict(state_dict)
self.assertEqual(net.linear1.weight.data, state_dict['linear1.weight'])
self.assertEqual(net.block.conv1.bias.data, state_dict['block.conv1.bias'])
self.assertEqual(net.bn.running_mean, state_dict['bn.running_mean'])
state_dict = net.state_dict()
state_dict.update({'extra': torch.ones(5)})
self.assertRaises(KeyError, lambda: net.load_state_dict(state_dict))
state_dict = net.state_dict()
del state_dict['linear1.weight']
self.assertRaises(KeyError, lambda: net.load_state_dict(state_dict))
def test_indexing(self):
x = torch.arange(1, 17).resize_(4, 4)
y = Variable(x, requires_grad=True)
def check_index(idx):
if y.grad is not None:
y.grad.data.zero_()
indexed_tensor = x[idx]
indexed_var = y[idx]
indexed_var_t = indexed_var.data
if not torch.is_tensor(indexed_tensor):
indexed_var_t = indexed_var_t[0]
self.assertEqual(indexed_tensor, indexed_var)
indexed_var.sum().backward()
expected_grad = torch.zeros(4, 4)
expected_grad[idx] = 1
self.assertEqual(y.grad.data, expected_grad)
check_index(1)
check_index((1, 1))
check_index(slice(1, None))
check_index(slice(None, 2))
check_index((slice(None, 2), 2))
check_index((slice(1, 2), 2))
check_index((1, slice(2, None)))
check_index((slice(None, None), slice(2, None)))
check_index(torch.LongTensor([0, 2]))
check_index(torch.rand(4, 4).bernoulli().byte())
check_index((Ellipsis, slice(2, None)))
def autograd_sharing(queue, ready, master_modified):
var = queue.get()
ready.set()
master_modified.wait()
expected_var = torch.arange(1, 26).view(5, 5)
expected_var[0, 0] = 1000
is_ok = var.data.equal(expected_var)
var.data[:] = torch.ones(5, 5)
is_ok &= var.grad is None
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
queue.put(is_ok)
def test_variable_sharing(self):
configs = [
(True, False),
(False, False),
(False, True),
]
for requires_grad, volatile in configs:
var = Variable(torch.arange(1, 26).view(5, 5),
requires_grad=requires_grad,
volatile=volatile)
self._test_autograd_sharing(var)
def test_parameter_sharing(self):
param = Parameter(torch.arange(1, 26).view(5, 5))
self._test_autograd_sharing(param)
def small_1d_lapack(t):
return t(1, 3).copy_(torch.arange(1, 4).view(3))