def make_optimizer(self, env, q_func):
opt = optimizers.Adam()
opt.setup(q_func)
return opt
python类Adam()的实例源码
def create_agent(self, env):
model = agents.a3c.A3CSeparateModel(
pi=create_stochastic_policy_for_env(env),
v=create_v_function_for_env(env))
opt = optimizers.Adam()
opt.setup(model)
return agents.A3C(model, opt, t_max=1, gamma=0.99)
def create_agent(self, env):
model = agents.acer.ACERSeparateModel(
pi=create_stochastic_policy_for_env(env),
q=create_state_q_function_for_env(env))
opt = optimizers.Adam()
opt.setup(model)
rbuf = replay_buffer.EpisodicReplayBuffer(10 ** 4)
return agents.ACER(model, opt, t_max=1, gamma=0.99,
replay_buffer=rbuf)
def create_agent(self, env):
model = create_state_q_function_for_env(env)
rbuf = replay_buffer.ReplayBuffer(10 ** 5)
opt = optimizers.Adam()
opt.setup(model)
explorer = explorers.ConstantEpsilonGreedy(
0.2, random_action_func=lambda: env.action_space.sample())
return agents.DoubleDQN(
model, opt, rbuf, gamma=0.99, explorer=explorer)
def create_agent(self, env):
model = create_state_q_function_for_env(env)
opt = optimizers.Adam()
opt.setup(model)
explorer = explorers.ConstantEpsilonGreedy(
0.2, random_action_func=lambda: env.action_space.sample())
return agents.NSQ(
q_function=model,
optimizer=opt,
t_max=1,
gamma=0.99,
i_target=100,
explorer=explorer)
def create_agent(self, env):
model = agents.ddpg.DDPGModel(
policy=create_deterministic_policy_for_env(env),
q_func=create_state_action_q_function_for_env(env))
rbuf = replay_buffer.ReplayBuffer(10 ** 5)
opt_a = optimizers.Adam()
opt_a.setup(model.policy)
opt_b = optimizers.Adam()
opt_b.setup(model.q_function)
explorer = explorers.AdditiveGaussian(scale=1)
return agents.DDPG(model, opt_a, opt_b, rbuf, gamma=0.99,
explorer=explorer)
def create_agent(self, env):
model = agents.ddpg.DDPGModel(
policy=create_stochastic_policy_for_env(env),
q_func=create_state_action_q_function_for_env(env))
rbuf = replay_buffer.ReplayBuffer(10 ** 5)
opt_a = optimizers.Adam()
opt_a.setup(model.policy)
opt_b = optimizers.Adam()
opt_b.setup(model.q_function)
explorer = explorers.AdditiveGaussian(scale=1)
return agents.PGT(model, opt_a, opt_b, rbuf, gamma=0.99,
explorer=explorer)
def __init__(self, model, target, device_id=-1,
learning_rate=0.00025, momentum=.9,
minibatch_size=32, update_interval=10000):
assert isinstance(model, ChainerModel), \
'model should inherit from ChainerModel'
super(QNeuralNetwork, self).__init__(model.input_shape,
model.output_shape)
self._gpu_device = None
self._loss_val = 0
# Target model update method
self._steps = 0
self._target_update_interval = update_interval
# Setup model and target network
self._minibatch_size = minibatch_size
self._model = model
self._target = target
self._target.copyparams(self._model)
# If GPU move to GPU memory
if device_id >= 0:
with cuda.get_device(device_id) as device:
self._gpu_device = device
self._model.to_gpu(device)
self._target.to_gpu(device)
# Setup optimizer
self._optimizer = Adam(learning_rate, momentum, 0.999)
self._optimizer.setup(self._model)
def __init__(self, model, target, device_id=-1,
learning_rate=0.00025, momentum=.9,
minibatch_size=32, update_interval=10000):
assert isinstance(model, ChainerModel), \
'model should inherit from ChainerModel'
super(QNeuralNetwork, self).__init__(model.input_shape,
model.output_shape)
self._gpu_device = None
self._loss_val = 0
# Target model update method
self._steps = 0
self._target_update_interval = update_interval
# Setup model and target network
self._minibatch_size = minibatch_size
self._model = model
self._target = target
self._target.copyparams(self._model)
# If GPU move to GPU memory
if device_id >= 0:
with cuda.get_device(device_id) as device:
self._gpu_device = device
self._model.to_gpu(device)
self._target.to_gpu(device)
# Setup optimizer
self._optimizer = Adam(learning_rate, momentum, 0.999)
self._optimizer.setup(self._model)
def __init__(self):
print "Initializing DQN..."
self.exploration_rate = config.rl_initial_exploration
self.fcl_eliminated = True if len(config.q_fc_hidden_units) == 0 else False
# Q Network
conv, fc = build_q_network(config)
self.conv = conv
if self.fcl_eliminated is False:
self.fc = fc
self.load()
self.update_target()
# Optimizer
## RMSProp, ADAM, AdaGrad, AdaDelta, ...
## See http://docs.chainer.org/en/stable/reference/optimizers.html
self.optimizer_conv = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
self.optimizer_conv.setup(self.conv)
if self.fcl_eliminated is False:
self.optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
self.optimizer_fc.setup(self.fc)
# Replay Memory
## (state, action, reward, next_state, episode_ends)
shape_state = (config.rl_replay_memory_size, config.rl_agent_history_length * config.ale_screen_channels, config.ale_scaled_screen_size[1], config.ale_scaled_screen_size[0])
shape_action = (config.rl_replay_memory_size,)
self.replay_memory = [
np.zeros(shape_state, dtype=np.float32),
np.zeros(shape_action, dtype=np.uint8),
np.zeros(shape_action, dtype=np.int8),
np.zeros(shape_state, dtype=np.float32),
np.zeros(shape_action, dtype=np.bool)
]
self.total_replay_memory = 0
self.no_op_count = 0
def get_learning_rate(opt):
if isinstance(opt, optimizers.NesterovAG):
return opt.lr
if isinstance(opt, optimizers.MomentumSGD):
return opt.lr
if isinstance(opt, optimizers.SGD):
return opt.lr
if isinstance(opt, optimizers.Adam):
return opt.alpha
raise NotImplementedError()
def set_learning_rate(opt, lr):
if isinstance(opt, optimizers.NesterovAG):
opt.lr = lr
return
if isinstance(opt, optimizers.MomentumSGD):
opt.lr = lr
return
if isinstance(opt, optimizers.SGD):
opt.lr = lr
return
if isinstance(opt, optimizers.Adam):
opt.alpha = lr
return
raise NotImplementedError()
def set_momentum(opt, momentum):
if isinstance(opt, optimizers.NesterovAG):
opt.momentum = momentum
return
if isinstance(opt, optimizers.MomentumSGD):
opt.momentum = momentum
return
if isinstance(opt, optimizers.SGD):
return
if isinstance(opt, optimizers.Adam):
opt.beta1 = momentum
return
raise NotImplementedError()
def get_optimizer(name, lr, momentum):
if name == "sgd":
return optimizers.SGD(lr=lr)
if name == "msgd":
return optimizers.MomentumSGD(lr=lr, momentum=momentum)
if name == "nesterov":
return optimizers.NesterovAG(lr=lr, momentum=momentum)
if name == "adam":
return optimizers.Adam(alpha=lr, beta1=momentum)
raise NotImplementedError()
def get_current_learning_rate(opt):
if isinstance(opt, optimizers.NesterovAG):
return opt.lr
if isinstance(opt, optimizers.MomentumSGD):
return opt.lr
if isinstance(opt, optimizers.SGD):
return opt.lr
if isinstance(opt, optimizers.Adam):
return opt.alpha
raise NotImplementedError()
def get_optimizer(name, lr, momentum):
if name == "sgd":
return optimizers.SGD(lr=lr)
if name == "msgd":
return optimizers.MomentumSGD(lr=lr, momentum=momentum)
if name == "nesterov":
return optimizers.NesterovAG(lr=lr, momentum=momentum)
if name == "adam":
return optimizers.Adam(alpha=lr, beta1=momentum)
raise NotImplementedError()
def __init__(self, d, batchsize, n_train_epoch, n_val_epoch, n_units):
self.d = d
self.batchsize = batchsize
self.n_train_epoch = n_train_epoch
self.n_val_epoch = n_val_epoch
self.n_units = n_units
self.model = L.Classifier(MLP(self.d, self.n_units, 2))
self.model.o = optimizers.Adam()
self.model.o.setup(self.model)
def __init__(self, d, f, R):
self.d = d
self.f = f
self.R = R
g = ChainList(*[L.Linear(1, f) for i in six.moves.range(AtomIdMax)])
H = ChainList(*[ChainList(*[L.Linear(f, f)
for i in six.moves.range(R)])
for j in six.moves.range(5)])
W = ChainList(*[L.Linear(f, d) for i in six.moves.range(R)])
self.model = Chain(H=H, W=W, g=g)
self.optimizer = optimizers.Adam()
self.optimizer.setup(self.model)
def __init__(self, d, batchsize, n_train_epoch, n_val_epoch, n_units, gpu):
self.d = d
self.batchsize = batchsize
self.n_train_epoch = n_train_epoch
self.n_val_epoch = n_val_epoch
self.n_units = n_units
self.optimizer = optimizers.Adam()
self.model = L.Classifier(MLP(self.d, self.n_units, 2))
if gpu:
self.model.to_gpu(0)
self.optimizer.setup(self.model)
def __init__(self):
Model.__init__(self)
self.fc = self.build_network(output_dim=len(config.actions))
self.optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
self.optimizer_fc.setup(self.fc)
self.optimizer_fc.add_hook(optimizer.GradientClipping(10.0))
self.load()
self.update_target()