def fit(self, observations, labels):
def closure():
predicted = self.predict(observations)
loss = self.loss_fn(predicted, labels)
self.optimizer.zero_grad()
loss.backward()
return loss
old_params = parameters_to_vector(self.model.parameters())
for lr in self.lr * .5**np.arange(10):
self.optimizer = optim.LBFGS(self.model.parameters(), lr=lr)
self.optimizer.step(closure)
current_params = parameters_to_vector(self.model.parameters())
if any(np.isnan(current_params.data.cpu().numpy())):
print("LBFGS optimization diverged. Rolling back update...")
vector_to_parameters(old_params, self.model.parameters())
else:
return
python类LBFGS的实例源码
def get_input_param_optimizer(input_img):
# this line to show that input is a parameter that requires a gradient
input_param = nn.Parameter(input_img.data)
optimizer = optim.LBFGS([input_param])
return input_param, optimizer
######################################################################
# **Last step**: the loop of gradient descent. At each step, we must feed
# the network with the updated input in order to compute the new losses,
# we must run the ``backward`` methods of each loss to dynamically compute
# their gradients and perform the step of gradient descent. The optimizer
# requires as argument a "closure": a function that reevaluates the model
# and returns the loss.
#
# However, there's a small catch. The optimized image may take its values
# between :math:`-\infty` and :math:`+\infty` instead of staying between 0
# and 1. In other words, the image might be well optimized and have absurd
# values. In fact, we must perform an optimization under constraints in
# order to keep having right vaues into our input image. There is a simple
# solution: at each step, to correct the image to maintain its values into
# the 0-1 interval.
#
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def lbfgs(w, lr=1, max_iter=20, max_eval=None, tol_grad=1e-05,
tol_change=1e-09, hist_size=100, line_search_fun=None):
return nn.LBFGS(params=w, lr=lr, max_iter=max_iter,
max_eval=max_eval, tolerance_grad=tol_grad,
tolerance_change=tol_change, history_size=hist_size,
line_search_fn=line_search_fun)
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def set_temperature(self, valid_loader):
"""
Tune the tempearature of the model (using the validation set).
We're going to set it to optimize NLL.
valid_loader (DataLoader): validation set loader
"""
self.cuda()
nll_criterion = nn.CrossEntropyLoss().cuda()
ece_criterion = _ECELoss().cuda()
# First: collect all the logits and labels for the validation set
logits_list = []
labels_list = []
for input, label in valid_loader:
input_var = Variable(input, volatile=True).cuda()
logits_var = self.model(input_var)
logits_list.append(logits_var.data)
labels_list.append(label)
logits = torch.cat(logits_list).cuda()
labels = torch.cat(labels_list).cuda()
logits_var = Variable(logits)
labels_var = Variable(labels)
# Calculate NLL and ECE before temperature scaling
before_temperature_nll = nll_criterion(logits_var, labels_var).data[0]
before_temperature_ece = ece_criterion(logits_var, labels_var).data[0]
print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece))
# Next: optimize the temperature w.r.t. NLL
optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50)
def eval():
loss = nll_criterion(self.temperature_scale(logits_var), labels_var)
loss.backward()
return loss
optimizer.step(eval)
# Calculate NLL and ECE after temperature scaling
after_temperature_nll = nll_criterion(self.temperature_scale(logits_var), labels_var).data[0]
after_temperature_ece = ece_criterion(self.temperature_scale(logits_var), labels_var).data[0]
print('Optimal temperature: %.3f' % self.temperature.data[0])
print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece))
return self
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def get_input_param_optimizer(input_img):
input_param = nn.Parameter(input_img.data)
optimizer = optim.LBFGS([input_param])
return input_param, optimizer