def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
python类Variable()的实例源码
def calc_gradient_penalty(self, netD, real_data, fake_data):
alpha = torch.rand(1, 1)
alpha = alpha.expand(real_data.size())
alpha = alpha.cuda()
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
interpolates = interpolates.cuda()
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = netD.forward(interpolates)
gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
create_graph=True, retain_graph=True, only_inputs=True)[0]
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.LAMBDA
return gradient_penalty
def xavier_uniform(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method
described in "Understanding the difficulty of training deep feedforward
neural networks" - Glorot, X. & Bengio, Y. (2010), using a uniform
distribution. The resulting tensor will have values sampled from
:math:`U(-a, a)` where
:math:`a = gain \\times \sqrt{2 / (fan\_in + fan\_out)} \\times \sqrt{3}`.
Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_uniform(w, gain=nn.init.calculate_gain('relu'))
"""
if isinstance(tensor, Variable):
xavier_uniform(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return tensor.uniform_(-a, a)
def xavier_normal(tensor, gain=1):
"""Fills the input Tensor or Variable with values according to the method
described in "Understanding the difficulty of training deep feedforward
neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`N(0, std)` where
:math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`.
Also known as Glorot initialisation.
Args:
tensor: an n-dimensional torch.Tensor or autograd.Variable
gain: an optional scaling factor
Examples:
>>> w = torch.Tensor(3, 5)
>>> nn.init.xavier_normal(w)
"""
if isinstance(tensor, Variable):
xavier_normal(tensor.data, gain=gain)
return tensor
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / (fan_in + fan_out))
return tensor.normal_(0, std)
def forward(self, x, lengths, hidden):
# Basket Encoding
ub_seqs = [] # users' basket sequence
for user in x: # x shape (batch of user, time_step, indice of product) nested lists
embed_baskets = []
for basket in user:
basket = torch.LongTensor(basket).resize_(1, len(basket))
basket = basket.cuda() if self.config.cuda else basket # use cuda for acceleration
basket = self.encode(torch.autograd.Variable(basket)) # shape: 1, len(basket), embedding_dim
embed_baskets.append(self.pool(basket, dim = 1))
# concat current user's all baskets and append it to users' basket sequence
ub_seqs.append(torch.cat(embed_baskets, 1)) # shape: 1, num_basket, embedding_dim
# Input for rnn
ub_seqs = torch.cat(ub_seqs, 0).cuda() if self.config.cuda else torch.cat(ub_seqs, 0) # shape: batch_size, max_len, embedding_dim
packed_ub_seqs = torch.nn.utils.rnn.pack_padded_sequence(ub_seqs, lengths, batch_first=True) # packed sequence as required by pytorch
# RNN
output, h_u = self.rnn(packed_ub_seqs, hidden)
dynamic_user, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True) # shape: batch_size, max_len, embedding_dim
return dynamic_user, h_u
train.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def package(data, volatile=False):
"""Package data for training / evaluation."""
data = map(lambda x: json.loads(x), data)
dat = map(lambda x: map(lambda y: dictionary.word2idx[y], x['text']), data)
maxlen = 0
for item in dat:
maxlen = max(maxlen, len(item))
targets = map(lambda x: x['label'], data)
maxlen = min(maxlen, 500)
for i in range(len(data)):
if maxlen < len(dat[i]):
dat[i] = dat[i][:maxlen]
else:
for j in range(maxlen - len(dat[i])):
dat[i].append(dictionary.word2idx['<pad>'])
dat = Variable(torch.LongTensor(dat), volatile=volatile)
targets = Variable(torch.LongTensor(targets), volatile=volatile)
return dat.t(), targets
def trainBatch(net, criterion, optimizer):
data = train_iter.next()
cpu_images, cpu_texts = data
batch_size = cpu_images.size(0)
utils.loadData(image, cpu_images)
t, l = converter.encode(cpu_texts)
utils.loadData(text, t)
utils.loadData(length, l)
preds = crnn(image)
preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
cost = criterion(preds, text, preds_size, length) / batch_size
crnn.zero_grad()
cost.backward()
optimizer.step()
return cost
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def printnorm_backward(self, input_, output):
global backward_grad
# input is a tuple of packed inputs
# output is a Variable. output.data is the Tensor we are interested
print('Inside ' + self.__class__.__name__ + ' backward')
print('')
print('input: ', type(input_))
print('input[0]: ', type(input_[0]))
print('output: ', type(output))
print('output[0]: ', type(output[0]))
print('')
print('input size:', input_[0].size())
print('output size:', len(output))
print('output[0] size:', output[0].size())
print('output norm:', output[0].data.norm())
backward_grad = input_[0].data.numpy()
# This could be useful for using the features produced by a pretrained network
# If all you care about is this feature vector, then use a Variable with volatile=True to speed up inference
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data, requires_grad=True), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
# Display the gradients
plt.clf()
plt.subplot(211); plt.hist(forward_grad.ravel()); plt.title("Features magnitude")
plt.subplot(212); plt.hist(backward_grad.ravel()); plt.title("Gradients")
plt.show(block=False)
plt.pause(0.01)
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def Saliency_map(image,model,preprocess,ground_truth,use_gpu=False,method=util.GradType.GUIDED):
vis_param_dict['method'] = method
img_tensor = preprocess(image)
img_tensor.unsqueeze_(0)
if use_gpu:
img_tensor=img_tensor.cuda()
input = Variable(img_tensor,requires_grad=True)
if input.grad is not None:
input.grad.data.zero_()
model.zero_grad()
output = model(input)
ind=torch.LongTensor(1)
if(isinstance(ground_truth,np.int64)):
ground_truth=np.asscalar(ground_truth)
ind[0]=ground_truth
ind=Variable(ind)
energy=output[0,ground_truth]
energy.backward()
grad=input.grad
if use_gpu:
return np.abs(grad.data.cpu().numpy()[0]).max(axis=0)
return np.abs(grad.data.numpy()[0]).max(axis=0)
def classifyOneImage(model,img_pil,preprocess):
model.eval()
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
if use_gpu:
img_tensor = img_tensor.cuda()
img_variable = Variable(img_tensor)
out = model(img_variable)
m = nn.Softmax()
if use_gpu:
return m(out).cpu()
return(out)
#method == util.GradType.NAIVE or util.GradType.GUIDED
def Occlusion_exp(image,occluding_size,occluding_stride,model,preprocess,classes,groundTruth):
img = np.copy(image)
height, width,_= img.shape
output_height = int(math.ceil((height-occluding_size)/occluding_stride+1))
output_width = int(math.ceil((width-occluding_size)/occluding_stride+1))
ocludedImages=[]
for h in range(output_height):
for w in range(output_width):
#occluder region
h_start = h*occluding_stride
w_start = w*occluding_stride
h_end = min(height, h_start + occluding_size)
w_end = min(width, w_start + occluding_size)
input_image = copy.copy(img)
input_image[h_start:h_end,w_start:w_end,:] = 0
ocludedImages.append(preprocess(Image.fromarray(input_image)))
L = np.empty(output_height*output_width)
L.fill(groundTruth)
L = torch.from_numpy(L)
tensor_images = torch.stack([img for img in ocludedImages])
dataset = torch.utils.data.TensorDataset(tensor_images,L)
dataloader = torch.utils.data.DataLoader(dataset,batch_size=5,shuffle=False, num_workers=8)
heatmap=np.empty(0)
model.eval()
for data in dataloader:
images, labels = data
if use_gpu:
images, labels = (images.cuda()), (labels.cuda(async=True))
outputs = model(Variable(images))
m = nn.Softmax()
outputs=m(outputs)
if use_gpu:
outs=outputs.cpu()
heatmap = np.concatenate((heatmap,outs[0:outs.size()[0],groundTruth].data.numpy()))
return heatmap.reshape((output_height, output_width))
def children(self):
"""
Returns an iterator for the non-empty children of the Node
The children are returned as (Node, pos) tuples where pos is 0 for the
left subnode and 1 for the right.
>>> len(list(create(dimensions=2).children))
0
>>> len(list(create([ Variable(torch.Tensor([[1, 2]])) ]).children))
0
>>> len(list(create([ Variable(torch.Tensor([[2, 2]])), Variable(torch.Tensor([[2, 1]])), Variable(torch.Tensor([[2, 3]])) ]).children))
2
"""
if self.left and self.left.data is not None:
yield self.left, 0
if self.right and self.right.data is not None:
yield self.right, 1
def test(self, nb_episodes=1, maximum_episode_length=5000000):
def evaluate_episode():
reward = 0
observation = self.env.reset()
for _ in range(maximum_episode_length):
action = self.choose_action(self.embedding_network(Variable(Tensor(observation)).unsqueeze(0)), 0)
observation, immediate_reward, finished, info = self.env.step(action)
reward += immediate_reward
if finished:
break
return reward
r = 0
for _ in range(nb_episodes):
r += evaluate_episode()
return r / nb_episodes
def forward(self, inputs):
# set up batch size
batch_size = inputs.size(0)
# compute hidden and cell
hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
hidden_cell = (hidden, cell)
# recurrent neural networks
outputs, _ = self.rnn.forward(inputs, hidden_cell)
outputs = outputs[:, -1, :].contiguous()
# compute features by outputs
features = self.feature.forward(outputs)
return features
def forward(self, inputs):
# set up batch size
batch_size = inputs.size(0)
# compute hidden and cell
hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
hidden_cell = (hidden, cell)
# recurrent neural networks
outputs, _ = self.rnn.forward(inputs, hidden_cell)
outputs = outputs.contiguous().view(-1, self.hidden_size * 2)
# compute classifications by outputs
outputs = self.classifier.forward(outputs)
outputs = F.softmax(outputs)
outputs = outputs.view(batch_size, -1, self.num_classes)
return outputs
def calc_gradient_penalty(netD, real_data, fake_data, sketch):
alpha = torch.rand(opt.batchSize, 1, 1, 1)
alpha = alpha.cuda() if opt.cuda else alpha
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
if opt.cuda:
interpolates = interpolates.cuda()
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = netD(interpolates, Variable(sketch))[0]
gradients = grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda() if opt.cuda else torch.ones(
disc_interpolates.size()),
create_graph=True, retain_graph=True, only_inputs=True)[0]
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * opt.gpW
return gradient_penalty
def calc_gradient_penalty(netD, real_data, fake_data):
# print "real_data: ", real_data.size(), fake_data.size()
alpha = torch.rand(opt.batchSize, 1, 1, 1)
# alpha = alpha.expand(opt.batchSize, real_data.nelement() / opt.batchSize).contiguous().view(opt.batchSize, 3, 64,
# 64)
alpha = alpha.cuda() if opt.cuda else alpha
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
if opt.cuda:
interpolates = interpolates.cuda()
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = netD(interpolates)
gradients = grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda() if opt.cuda else torch.ones(
disc_interpolates.size()),
create_graph=True, retain_graph=True, only_inputs=True)[0]
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * opt.gpW
return gradient_penalty
def calc_gradient_penalty(netD, real_data, fake_data):
# print "real_data: ", real_data.size(), fake_data.size()
alpha = torch.rand(opt.batchSize, 1, 1, 1)
# alpha = alpha.expand(opt.batchSize, real_data.nelement() / opt.batchSize).contiguous().view(opt.batchSize, 3, 64,
# 64)
alpha = alpha.cuda() if opt.cuda else alpha
interpolates = alpha * real_data + ((1 - alpha) * fake_data)
if opt.cuda:
interpolates = interpolates.cuda()
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = netD(interpolates)
gradients = grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda() if opt.cuda else torch.ones(
disc_interpolates.size()),
create_graph=True, retain_graph=True, only_inputs=True)[0]
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * opt.gpW
return gradient_penalty
def get_target_tensor(self, input, target_is_real):
if target_is_real:
create_label = ((self.real_label_var is None) or
(self.real_label_var.numel() != input.numel()))
if create_label:
real_tensor = self.Tensor(input.size()).fill_(self.real_label)
self.real_label_var = Variable(real_tensor, requires_grad=False)
target_tensor = self.real_label_var
else:
create_label = ((self.fake_label_var is None) or
(self.fake_label_var.numel() != input.numel()))
if create_label:
fake_tensor = self.Tensor(input.size()).fill_(self.fake_label)
self.fake_label_var = Variable(fake_tensor, requires_grad=False)
target_tensor = self.fake_label_var
return target_tensor
def get_target_tensor(self, input, target_is_real):
if target_is_real:
create_label = ((self.real_label_var is None) or
(self.real_label_var.numel() != input.numel()))
if create_label:
real_tensor = self.Tensor(input.size()).fill_(self.real_label)
self.real_label_var = Variable(real_tensor, requires_grad=False)
target_tensor = self.real_label_var
else:
create_label = ((self.fake_label_var is None) or
(self.fake_label_var.numel() != input.numel()))
if create_label:
fake_tensor = self.Tensor(input.size()).fill_(self.fake_label)
self.fake_label_var = Variable(fake_tensor, requires_grad=False)
target_tensor = self.fake_label_var
return target_tensor
def init_hidden(self, height, width):
self.height = height
self.width = width
self.batch = height * width
self.cell_state = Variable(
torch.zeros(
self.lstm_layer,
self.batch,
self.hidden_dim))
self.hidden_state = Variable(
torch.zeros(
self.lstm_layer,
self.batch,
self.hidden_dim))
if self.on_gpu:
self.cell_state = self.cell_state.cuda()
self.hidden_state = self.hidden_state.cuda()
def train(e, model, opt, dataset, arg, cuda=False):
model.train()
criterion = nn.MSELoss()
losses = []
batcher = dataset.get_batcher(shuffle=True, augment=True)
for b, (x, y) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
y = V(th.from_numpy(y).float()).cuda()
opt.zero_grad()
logit = model(x)
loss = criterion(logit, y)
loss.backward()
opt.step()
losses.append(loss.data[0])
if arg.verbose and b % 50 == 0:
loss_t = np.mean(losses[:-49])
print('[train] [e]:%s [b]:%s - [loss]:%s' % (e, b, loss_t))
return losses
def validate(models, dataset, arg, cuda=False):
criterion = nn.MSELoss()
losses = []
batcher = dataset.get_batcher(shuffle=True, augment=False)
for b, (x, y) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
y = V(th.from_numpy(y).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
loss = criterion(logit, y)
losses.append(loss.data[0])
return np.mean(losses)
def predict(models, dataset, arg, cuda=False):
prediction_file = open('save/predictions.txt', 'w')
batcher = dataset.get_batcher(shuffle=False, augment=False)
for b, (x, _) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
prediction = logit.cpu().data[0][0]
prediction_file.write('%s\n' % prediction)
if arg.verbose and b % 100 == 0:
print('[predict] [b]:%s - prediction: %s' % (b, prediction))
# prediction_file.close()
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers):
# TODO: Calculate batchnorm using GPU Tensors.
assert len(hat_z_layers) == len(z_pre_layers)
hat_z_layers_normalized = []
for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)):
if self.use_cuda:
ones = Variable(torch.ones(z_pre.size()[0], 1).cuda())
else:
ones = Variable(torch.ones(z_pre.size()[0], 1))
mean = torch.mean(z_pre, 0)
noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size())
if self.use_cuda:
var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
else:
var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
var = Variable(torch.FloatTensor(var))
if self.use_cuda:
hat_z = hat_z.cpu()
ones = ones.cpu()
mean = mean.cpu()
hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10)))
if self.use_cuda:
hat_z_normalized = hat_z_normalized.cuda()
hat_z_layers_normalized.append(hat_z_normalized)
return hat_z_layers_normalized
def evaluate_performance(ladder, valid_loader, e, agg_cost_scaled, agg_supervised_cost_scaled,
agg_unsupervised_cost_scaled, args):
correct = 0.
total = 0.
for batch_idx, (data, target) in enumerate(valid_loader):
if args.cuda:
data = data.cuda()
data, target = Variable(data), Variable(target)
output = ladder.forward_encoders_clean(data)
# TODO: Do away with the below hack for GPU tensors.
if args.cuda:
output = output.cpu()
target = target.cpu()
output = output.data.numpy()
preds = np.argmax(output, axis=1)
target = target.data.numpy()
correct += np.sum(target == preds)
total += target.shape[0]
print("Epoch:", e + 1, "\t",
"Total Cost:", "{:.4f}".format(agg_cost_scaled), "\t",
"Supervised Cost:", "{:.4f}".format(agg_supervised_cost_scaled), "\t",
"Unsupervised Cost:", "{:.4f}".format(agg_unsupervised_cost_scaled), "\t",
"Validation Accuracy:", correct / total)
def forward_noise(self, tilde_h):
# z_pre will be used in the decoder cost
z_pre = self.linear(tilde_h)
z_pre_norm = self.bn_normalize(z_pre)
# Add noise
noise = np.random.normal(loc=0.0, scale=self.noise_level, size=z_pre_norm.size())
if self.use_cuda:
noise = Variable(torch.cuda.FloatTensor(noise))
else:
noise = Variable(torch.FloatTensor(noise))
# tilde_z will be used by decoder for reconstruction
tilde_z = z_pre_norm + noise
# store tilde_z in buffer
self.buffer_tilde_z = tilde_z
z = self.bn_gamma_beta(tilde_z)
h = self.activation(z)
return h