def create_agent(self, env):
model = agents.acer.ACERSeparateModel(
pi=create_stochastic_policy_for_env(env),
q=create_state_q_function_for_env(env))
opt = optimizers.Adam()
opt.setup(model)
rbuf = replay_buffer.EpisodicReplayBuffer(10 ** 4)
return agents.ACER(model, opt, t_max=1, gamma=0.99,
replay_buffer=rbuf)
评论列表
文章目录