def create_agent(self, env):
model = agents.ddpg.DDPGModel(
policy=create_stochastic_policy_for_env(env),
q_func=create_state_action_q_function_for_env(env))
rbuf = replay_buffer.ReplayBuffer(10 ** 5)
opt_a = optimizers.Adam()
opt_a.setup(model.policy)
opt_b = optimizers.Adam()
opt_b.setup(model.q_function)
explorer = explorers.AdditiveGaussian(scale=1)
return agents.PGT(model, opt_a, opt_b, rbuf, gamma=0.99,
explorer=explorer)
评论列表
文章目录