def test_duel_dqn():
env = TwoRoundDeterministicRewardEnv()
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.n
# Next, we build a very simple model.
model = Sequential()
model.add(Dense(16, input_shape=(1,)))
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation='linear'))
memory = SequentialMemory(limit=1000, window_length=1)
policy = EpsGreedyQPolicy(eps=.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True)
dqn.compile(Adam(lr=1e-3))
dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
policy.eps = 0.
h = dqn.test(env, nb_episodes=20, visualize=False)
assert_allclose(np.mean(h.history['episode_reward']), 3.)
评论列表
文章目录