def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
# 1. Save the model every epoch
torch.save(model.state_dict(), "mnist_model_{0:03d}.pwf".format(epoch))
python类nll_loss()的实例源码
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def train(args, epoch, net, trainLoader, optimizer, trainF):
net.train()
nProcessed = 0
nTrain = len(trainLoader.dataset)
for batch_idx, (data, target) in enumerate(trainLoader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net(data)
loss = F.nll_loss(output, target)
# make_graph.save('/tmp/t.dot', loss.creator); assert(False)
loss.backward()
optimizer.step()
nProcessed += len(data)
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect = pred.ne(target.data).cpu().sum()
err = 100.*incorrect/len(data)
partialEpoch = epoch + batch_idx / len(trainLoader) - 1
print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader),
loss.data[0], err))
trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], err))
trainF.flush()
def test(args, epoch, net, testLoader, optimizer, testF):
net.eval()
test_loss = 0
incorrect = 0
for data, target in testLoader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = net(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect += pred.ne(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(testLoader) # loss function already averages over batch size
nTotal = len(testLoader.dataset)
err = 100.*incorrect/nTotal
print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
test_loss, incorrect, nTotal, err))
testF.write('{},{},{}\n'.format(epoch, test_loss, err))
testF.flush()
def train(args, epoch, net, trainLoader, optimizer, trainF):
net.train()
nProcessed = 0
nTrain = len(trainLoader.dataset)
for batch_idx, (data, target) in enumerate(trainLoader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net(data)
loss = F.nll_loss(output, target)
# make_graph.save('/tmp/t.dot', loss.creator); assert(False)
loss.backward()
optimizer.step()
nProcessed += len(data)
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect = pred.ne(target.data).cpu().sum()
err = 100.*incorrect/len(data)
partialEpoch = epoch + batch_idx / len(trainLoader) - 1
print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader),
loss.data[0], err))
trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], err))
trainF.flush()
def test(args, epoch, net, testLoader, optimizer, testF):
net.eval()
test_loss = 0
incorrect = 0
for data, target in testLoader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = net(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect += pred.ne(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(testLoader) # loss function already averages over batch size
nTotal = len(testLoader.dataset)
err = 100.*incorrect/nTotal
print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
test_loss, incorrect, nTotal, err))
testF.write('{},{},{}\n'.format(epoch, test_loss, err))
testF.flush()
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, size_average=False).data[0]
# get the index of the max log-probability
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
.format(test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def train_(self, epoch):
self.train()
for batch_idx, (data, target) in enumerate(train_loader):
correct = 0
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
self.optimizer.zero_grad()
output = self(data)
loss = F.nll_loss(output, target)
loss.backward()
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
accuracy = 100. * correct / len(data)
self.optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.4f}%)'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0],
correct, len(data),
accuracy))
self.train_acc.append(accuracy)
def test_(self, epoch):
self.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(test_loader) # loss function already averages over batch size
accuracy = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
accuracy))
self.test_acc.append(accuracy)
def train_(self, epoch):
self.train()
for batch_idx, (data, target) in enumerate(train_loader):
correct = 0
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
self.optimizer.zero_grad()
output = self(data)
loss = F.nll_loss(output, target)
loss.backward()
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
accuracy = 100. * correct / len(data)
self.optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.4f}%)'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0],
correct, len(data),
accuracy))
self.train_acc.append(accuracy)
def test_(self, epoch):
self.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(test_loader) # loss function already averages over batch size
accuracy = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
accuracy))
self.test_acc.append(accuracy)
def test_normal(model, test_loader):
test_loss = 0
correct = 0
for data, target in test_loader:
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1]
correct += pred.eq(target.data).cpu().sum()
test_loss /= len(test_loader)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100 * correct / len(test_loader.dataset)))
return test_loss
def test_normal(model):
test_loss = 0
correct = 0
for data, target in test_loader:
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1]
correct += pred.eq(target.data).cpu().sum()
test_loss /= len(test_loader)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100 * correct / len(test_loader.dataset)))
return test_loss
def test(epoch):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(
test_loader) # loss function already averages over batch size
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset), 100. * correct / len(
test_loader.dataset)))
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def test(epoch):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(test_loader) # loss function already averages over batch size
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def loss(self, batch_data, test=False):
# unpack data
(source, targets), conds = batch_data, None
if self.conds is not None:
(source, *conds), (targets, *_) = source, targets
# get hidden from previous batch (if stored)
hidden = self.hidden_state.get('hidden', None)
# run RNN
output, hidden, _ = self(source, hidden=hidden, conds=conds)
# store hidden for next batch
self.hidden_state['hidden'] = u.repackage_hidden(hidden)
# compute loss and backward
loss = F.nll_loss(output, targets.view(-1), size_average=True)
if not test:
loss.backward()
return (loss.data[0], ), source.nelement()
def cross_entropy2d(input, target, weight=None, size_average=True):
# input: (n, c, h, w), target: (n, h, w)
n, c, h, w = input.size()
# log_p: (n, c, h, w)
log_p = F.log_softmax(input)
# log_p: (n*h*w, c)
log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
log_p = log_p.view(-1, c)
# target: (n*h*w,)
mask = target >= 0
target = target[mask]
loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
if size_average:
loss /= mask.data.sum()
return loss
def nllloss_no_reduce_weights_ignore_index_test():
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
weight = torch.rand(10)
def kwargs(i):
return {'weight': weight.type_as(i), 'reduce': False,
'ignore_index': 2}
return dict(
fullname='NLLLoss_no_reduce_weights_ignore_index',
constructor=wrap_functional(
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))),
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
reference_fn=lambda i, _:
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)),
pickle=False)
def nllloss_no_reduce_weights_ignore_index_neg_test():
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
weight = torch.rand(10)
def kwargs(i):
return {'weight': weight.type_as(i), 'reduce': False,
'ignore_index': -1}
return dict(
fullname='NLLLoss_no_reduce_weights_ignore_index_neg',
constructor=wrap_functional(
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))),
input=torch.rand(15, 10).add(1e-2).log(),
reference_fn=lambda i, _:
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)),
pickle=False)
def train(args, epoch, net, trainLoader, optimizer, trainF):
net.train()
nProcessed = 0
nTrain = len(trainLoader.dataset)
for batch_idx, (data, target) in enumerate(trainLoader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net(data)
loss = F.nll_loss(output, target)
# make_graph.save('/tmp/t.dot', loss.creator); assert(False)
loss.backward()
optimizer.step()
nProcessed += len(data)
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect = pred.ne(target.data).cpu().sum()
err = 100.*incorrect/len(data)
partialEpoch = epoch + batch_idx / len(trainLoader) - 1
print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader),
loss.data[0], err))
trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], err))
trainF.flush()
def test(args, epoch, net, testLoader, optimizer, testF):
net.eval()
test_loss = 0
incorrect = 0
for data, target in testLoader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = net(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect += pred.ne(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(testLoader) # loss function already averages over batch size
nTotal = len(testLoader.dataset)
err = 100.*incorrect/nTotal
print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
test_loss, incorrect, nTotal, err))
testF.write('{},{},{}\n'.format(epoch, test_loss, err))
testF.flush()
def train(args, epoch, net, trainLoader, optimizer, trainF):
net.train()
nProcessed = 0
nTrain = len(trainLoader.dataset)
for batch_idx, (data, target) in enumerate(trainLoader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net(data)
loss = F.nll_loss(output, target)
# make_graph.save('/tmp/t.dot', loss.creator); assert(False)
loss.backward()
optimizer.step()
nProcessed += len(data)
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect = pred.ne(target.data).cpu().sum()
err = 100.*incorrect/len(data)
partialEpoch = epoch + batch_idx / len(trainLoader) - 1
print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader),
loss.data[0], err))
trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], err))
trainF.flush()
def test(args, epoch, net, testLoader, optimizer, testF):
net.eval()
test_loss = 0
incorrect = 0
for data, target in testLoader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = net(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
incorrect += pred.ne(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(testLoader) # loss function already averages over batch size
nTotal = len(testLoader.dataset)
err = 100.*incorrect/nTotal
print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
test_loss, incorrect, nTotal, err))
testF.write('{},{},{}\n'.format(epoch, test_loss, err))
testF.flush()
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if use_cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 500 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
#
# A simple test procedure to measure STN the performances on MNIST.
#
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))