def create_agent(self, env):
model = create_state_q_function_for_env(env)
rbuf = replay_buffer.ReplayBuffer(10 ** 5)
opt = optimizers.Adam()
opt.setup(model)
explorer = explorers.ConstantEpsilonGreedy(
0.2, random_action_func=lambda: env.action_space.sample())
return agents.DoubleDQN(
model, opt, rbuf, gamma=0.99, explorer=explorer)
评论列表
文章目录