def __init__(self, master, thread_id, clip_gradients=True):
super(A3CThread, self).__init__(name=thread_id)
self.thread_id = thread_id
self.clip_gradients = clip_gradients
self.env = make_environment(master.env_name)
self.master = master
self.config = master.config
if thread_id == 0 and self.master.monitor:
self.env = wrappers.Monitor(self.env, master.monitor_path, force=True, video_callable=(None if self.master.video else False))
# Only used (and overwritten) by agents that use an RNN
self.initial_features = None
# Build actor and critic networks
with tf.variable_scope("t{}_net".format(self.thread_id)):
self.action, self.value, self.actor_states, self.critic_states, self.actions_taken, self.losses, self.adv, self.r, self.n_steps = self.build_networks()
self.sync_net = self.create_sync_net_op()
inc_step = self.master.global_step.assign_add(self.n_steps)
self.train_op = tf.group(self.make_trainer(), inc_step)
# Write the summary of each thread in a different directory
self.writer = tf.summary.FileWriter(os.path.join(self.master.monitor_path, "thread" + str(self.thread_id)), self.master.session.graph)
self.runner = RunnerThread(self.env, self, 20, thread_id == 0 and self.master.video)
评论列表
文章目录