def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
python类Adadelta()的实例源码
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def _makeOptimizer(self):
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr)
else:
raise RuntimeError("Invalid optim method: " + self.method)
def __init__(self, config, model):
# assert isinstance(model, Model)
self.config = config
self.model = model
# self.optimizer = O.Adadelta(config.init_lr)
def set_parameters(self, params):
self.params = list(params) # careful: params may be a generator
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr)
else:
raise RuntimeError("Invalid optim method: " + self.method)
def set_parameters(self, params):
self.params = list(params) # careful: params may be a generator
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr)
else:
raise RuntimeError("Invalid optim method: " + self.method)
def adadelta(w, lr=1.0, rho=0.9, eps=1e-06, w_decay=0):
return nn.Adadelta(params=w, lr=lr,
rho=rho, eps=eps,
weight_decay=w_decay)
def _makeOptimizer(self):
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr, betas=(0.5, 0.999))
else:
raise RuntimeError("Invalid optim method: " + self.method)
def _makeOptimizer(self):
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr)
elif self.method == 'rmsprop':
self.optimizer = optim.RMSprop(self.params, lr=self.lr)
else:
raise RuntimeError("Invalid optim method: " + self.method)
def set_parameters(self, params):
self.params = [p for p in params if p.requires_grad]
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
for group in self.optimizer.param_groups:
for p in group['params']:
self.optimizer.state[p]['sum'] = self.optimizer\
.state[p]['sum'].fill_(self.adagrad_accum)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr,
betas=self.betas, eps=1e-9)
else:
raise RuntimeError("Invalid optim method: " + self.method)
# We use the default parameters for Adam that are suggested by
# the original paper https://arxiv.org/pdf/1412.6980.pdf
# These values are also used by other established implementations,
# e.g. https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
# https://keras.io/optimizers/
# Recently there are slightly different values used in the paper
# "Attention is all you need"
# https://arxiv.org/pdf/1706.03762.pdf, particularly the value beta2=0.98
# was used there however, beta2=0.999 is still arguably the more
# established value, so we use that here as well
def __init__(self, args, dataloader_train, dataloader_dev, char_embedding_config, word_embedding_config,
sentence_encoding_config, pair_encoding_config, self_matching_config, pointer_config):
# for validate
expected_version = "1.1"
with open(args.dev_json) as dataset_file:
dataset_json = json.load(dataset_file)
if dataset_json['version'] != expected_version:
print('Evaluation expects v-' + expected_version +
', but got dataset with v-' + dataset_json['version'],
file=sys.stderr)
self.dev_dataset = dataset_json['data']
self.dataloader_train = dataloader_train
self.dataloader_dev = dataloader_dev
self.model = RNet.Model(args, char_embedding_config, word_embedding_config, sentence_encoding_config,
pair_encoding_config, self_matching_config, pointer_config)
self.parameters_trainable = list(
filter(lambda p: p.requires_grad, self.model.parameters()))
self.optimizer = optim.Adadelta(self.parameters_trainable, rho=0.95)
self.best_f1 = 0
self.step = 0
self.start_epoch = args.start_epoch
self.name = args.name
self.start_time = datetime.datetime.now().strftime('%b-%d_%H-%M')
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
self.start_epoch = checkpoint['epoch']
self.best_f1 = checkpoint['best_f1']
self.name = checkpoint['name']
self.step = checkpoint['step']
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
self.start_time = checkpoint['start_time']
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
else:
raise ValueError("=> no checkpoint found at '{}'".format(args.resume))
else:
self.name += "_" + self.start_time
# use which device
if torch.cuda.is_available():
self.model = self.model.cuda(args.device_id)
else:
self.model = self.model.cpu()
self.loss_fn = torch.nn.CrossEntropyLoss()
configure("log/%s" % (self.name), flush_secs=5)
self.checkpoint_path = os.path.join(args.checkpoint_path, self.name)
make_dirs(self.checkpoint_path)
def parse_args():
parser = argparse.ArgumentParser(
description='A Simple Demo of Generative Adversarial Networks with 2D Samples',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input_path',
help='Image or directory containing images to define distribution')
parser.add_argument('--z_dim',
help='Dimensionality of latent space',
type=int, default=2)
parser.add_argument('--iterations',
help='Num of training iterations',
type=int, default=2000)
parser.add_argument('--batch_size',
help='Batch size of each kind',
type=int, default=2000)
parser.add_argument('--optimizer',
help='Optimizer: Adadelta/Adam/RMSprop/SGD',
type=str, default='Adadelta')
parser.add_argument('--d_lr',
help='Learning rate of discriminator, for Adadelta it is the base learning rate',
type=float, default=1)
parser.add_argument('--g_lr',
help='Learning rate of generator, for Adadelta it is the base learning rate',
type=float, default=1)
parser.add_argument('--d_steps',
help='Steps of discriminators in each iteration',
type=int, default=3)
parser.add_argument('--g_steps',
help='Steps of generator in each iteration',
type=int, default=1)
parser.add_argument('--d_hidden_size',
help='Num of hidden units in discriminator',
type=int, default=100)
parser.add_argument('--g_hidden_size',
help='Num of hidden units in generator',
type=int, default=50)
parser.add_argument('--display_interval',
help='Interval of iterations to display/export images',
type=int, default=10)
parser.add_argument('--no_display',
help='Show plots during training', action='store_true')
parser.add_argument('--export',
help='Export images', action='store_true')
parser.add_argument('--cpu',
help='Set to CPU mode', action='store_true')
args = parser.parse_args()
args.input_path = args.input_path.rstrip(os.sep)
args.optimizer = OPTIMIZERS[args.optimizer.lower()]
return args
def get_optimizer(s):
"""
Parse optimizer parameters.
Input should be of the form:
- "sgd,lr=0.01"
- "adagrad,lr=0.1,lr_decay=0.05"
"""
if "," in s:
method = s[:s.find(',')]
optim_params = {}
for x in s[s.find(',') + 1:].split(','):
split = x.split('=')
assert len(split) == 2
assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
optim_params[split[0]] = float(split[1])
else:
method = s
optim_params = {}
if method == 'adadelta':
optim_fn = optim.Adadelta
elif method == 'adagrad':
optim_fn = optim.Adagrad
elif method == 'adam':
optim_fn = optim.Adam
elif method == 'adamax':
optim_fn = optim.Adamax
elif method == 'asgd':
optim_fn = optim.ASGD
elif method == 'rmsprop':
optim_fn = optim.RMSprop
elif method == 'rprop':
optim_fn = optim.Rprop
elif method == 'sgd':
optim_fn = optim.SGD
assert 'lr' in optim_params
else:
raise Exception('Unknown optimization method: "%s"' % method)
# check that we give good parameters to the optimizer
expected_args = inspect.getargspec(optim_fn.__init__)[0]
assert expected_args[:2] == ['self', 'params']
if not all(k in expected_args[2:] for k in optim_params.keys()):
raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
str(expected_args[2:]), str(optim_params.keys())))
return optim_fn, optim_params
def get_optimizer(model, s):
"""
Parse optimizer parameters.
Input should be of the form:
- "sgd,lr=0.01"
- "adagrad,lr=0.1,lr_decay=0.05"
"""
if "," in s:
method = s[:s.find(',')]
optim_params = {}
for x in s[s.find(',') + 1:].split(','):
split = x.split('=')
assert len(split) == 2
assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
optim_params[split[0]] = float(split[1])
else:
method = s
optim_params = {}
if method == 'adadelta':
optim_fn = optim.Adadelta
elif method == 'adagrad':
optim_fn = optim.Adagrad
elif method == 'adam':
optim_fn = optim.Adam
optim_params['betas'] = (optim_params.get('beta1', 0.5), optim_params.get('beta2', 0.999))
optim_params.pop('beta1', None)
optim_params.pop('beta2', None)
elif method == 'adamax':
optim_fn = optim.Adamax
elif method == 'asgd':
optim_fn = optim.ASGD
elif method == 'rmsprop':
optim_fn = optim.RMSprop
elif method == 'rprop':
optim_fn = optim.Rprop
elif method == 'sgd':
optim_fn = optim.SGD
assert 'lr' in optim_params
else:
raise Exception('Unknown optimization method: "%s"' % method)
# check that we give good parameters to the optimizer
expected_args = inspect.getargspec(optim_fn.__init__)[0]
assert expected_args[:2] == ['self', 'params']
if not all(k in expected_args[2:] for k in optim_params.keys()):
raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
str(expected_args[2:]), str(optim_params.keys())))
return optim_fn(model.parameters(), **optim_params)
def get_optimizer(s):
"""
Parse optimizer parameters.
Input should be of the form:
- "sgd,lr=0.01"
- "adagrad,lr=0.1,lr_decay=0.05"
"""
if "," in s:
method = s[:s.find(',')]
optim_params = {}
for x in s[s.find(',') + 1:].split(','):
split = x.split('=')
assert len(split) == 2
assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
optim_params[split[0]] = float(split[1])
else:
method = s
optim_params = {}
if method == 'adadelta':
optim_fn = optim.Adadelta
elif method == 'adagrad':
optim_fn = optim.Adagrad
elif method == 'adam':
optim_fn = optim.Adam
elif method == 'adamax':
optim_fn = optim.Adamax
elif method == 'asgd':
optim_fn = optim.ASGD
elif method == 'rmsprop':
optim_fn = optim.RMSprop
elif method == 'rprop':
optim_fn = optim.Rprop
elif method == 'sgd':
optim_fn = optim.SGD
assert 'lr' in optim_params
else:
raise Exception('Unknown optimization method: "%s"' % method)
# check that we give good parameters to the optimizer
expected_args = inspect.getargspec(optim_fn.__init__)[0]
assert expected_args[:2] == ['self', 'params']
if not all(k in expected_args[2:] for k in optim_params.keys()):
raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
str(expected_args[2:]), str(optim_params.keys())))
return optim_fn, optim_params