def dqn_test(env='OneRoundDeterministicReward-v0'):
env = gym.make(env)
env = ObservationShapeWrapper(env)
@tt.model(tracker=tf.train.ExponentialMovingAverage(1-.01),
optimizer=tf.train.AdamOptimizer(.01))
def q_network(x):
x = layers.fully_connected(x, 32)
x = layers.fully_connected(x, env.action_space.n, activation_fn=None,
weights_initializer=tf.random_normal_initializer(0, 1e-4))
return x
agent = DqnAgent(env, q_network, double_dqn=False, replay_start=100, annealing_time=100)
rs = []
for ep in range(10000):
r, _ = agent.play_episode()
rs.append(r)
if ep % 100 == 0:
print(f'Return after episode {ep} is {sum(rs)/len(rs)}')
rs = []
评论列表
文章目录