def __init__(self, env, monitor_path, video=True, **usercfg):
super(REINFORCE, self).__init__(**usercfg)
self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
self.env_runner = EnvRunner(self.env, self, usercfg)
self.monitor_path = monitor_path
# Default configuration. Can be overwritten using keyword arguments.
self.config.update(dict(
batch_update="timesteps",
timesteps_per_batch=1000,
n_iter=100,
gamma=0.99, # Discount past rewards by a percentage
decay=0.9, # Decay of RMSProp optimizer
epsilon=1e-9, # Epsilon of RMSProp optimizer
learning_rate=0.05,
n_hidden_units=20,
repeat_n_actions=1,
save_model=False
))
self.config.update(usercfg)
self.build_network()
self.make_trainer()
init = tf.global_variables_initializer()
# Launch the graph.
self.session = tf.Session()
self.session.run(init)
if self.config["save_model"]:
tf.add_to_collection("action", self.action)
tf.add_to_collection("states", self.states)
self.saver = tf.train.Saver()
self.rewards = tf.placeholder("float", name="Rewards")
self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
summary_loss = tf.summary.scalar("Loss", self.summary_loss)
summary_rewards = tf.summary.scalar("Rewards", self.rewards)
summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
self.summary_op = tf.summary.merge([summary_loss, summary_rewards, summary_episode_lengths])
self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "task0"), self.session.graph)
评论列表
文章目录