def test_ddpg():
import gym_mix
env = gym.make('ContinuousCopyRand-v0')
env = wrappers.TimeLimit(env, max_episode_steps=0)
@model(optimizer=tf.train.AdamOptimizer(0.0001),
tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
def actor(x):
x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer())
a = layers.fully_connected(x, env.action_space.shape[0], None,
weights_initializer=tf.random_normal_initializer(0, 1e-4))
return a
@model(optimizer=tf.train.AdamOptimizer(.001),
tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
def critic(x, a):
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
x = tf.concat([x, a], axis=1)
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4))
return tf.squeeze(q, 1)
agent = DdpgAgent(env, actor, critic)
for ep in range(10000):
R, _ = agent.play_episode()
if ep % 100 == 0:
print(f'Return after episode {ep} is {R}')
评论列表
文章目录