def __init__(self, env, monitor_path, video=True, **usercfg):
super(A2C, self).__init__(**usercfg)
self.monitor_path = monitor_path
self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
self.env_runner = EnvRunner(self.env, self, usercfg)
self.config.update(dict(
timesteps_per_batch=10000,
trajectories_per_batch=10,
batch_update="timesteps",
n_iter=100,
gamma=0.99,
actor_learning_rate=0.01,
critic_learning_rate=0.05,
actor_n_hidden=20,
critic_n_hidden=20,
repeat_n_actions=1,
save_model=False
))
self.config.update(usercfg)
self.build_networks()
init = tf.global_variables_initializer()
# Launch the graph.
self.session = tf.Session()
self.session.run(init)
if self.config["save_model"]:
tf.add_to_collection("action", self.action)
tf.add_to_collection("states", self.states)
self.saver = tf.train.Saver()
self.rewards = tf.placeholder("float", name="Rewards")
self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
summary_actor_loss = tf.summary.scalar("Actor_loss", self.summary_actor_loss)
summary_critic_loss = tf.summary.scalar("Critic_loss", self.summary_critic_loss)
summary_rewards = tf.summary.scalar("Rewards", self.rewards)
summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
self.summary_op = tf.summary.merge([summary_actor_loss, summary_critic_loss, summary_rewards, summary_episode_lengths])
self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "summaries"), self.session.graph)
return
评论列表
文章目录