def create_agent(self, env):
model = create_state_q_function_for_env(env)
opt = optimizers.Adam()
opt.setup(model)
explorer = explorers.ConstantEpsilonGreedy(
0.2, random_action_func=lambda: env.action_space.sample())
return agents.NSQ(
q_function=model,
optimizer=opt,
t_max=1,
gamma=0.99,
i_target=100,
explorer=explorer)
评论列表
文章目录