def test_dqn(env='Chain-v0'):
import gym_mix
env = gym.make(env)
def pp(x):
x = layers.fully_connected(x, 32)
x = layers.fully_connected(x, 32)
return x
def head(x):
x = layers.fully_connected(x, 32)
x = layers.fully_connected(x, env.action_space.n, activation_fn=None,
weights_initializer=tf.random_normal_initializer(0, 1e-4))
return x
agent = BootstrappedDQNAg(env, pp, head, replay_start=64)
for ep in range(100000):
R, _ = agent.play_episode()
if ep % 100 == 0:
print(f'Return after episode {ep} is {R}')
评论列表
文章目录