def setup_summaries(self):
episode_reward = tf.Variable(0.)
s1 = tf.scalar_summary("Episode Reward " + str(self.actor_id), episode_reward)
if self.alg_type == "a3c":
summary_vars = [episode_reward]
else:
episode_ave_max_q = tf.Variable(0.)
s2 = tf.scalar_summary("Max Q Value " + str(self.actor_id), episode_ave_max_q)
logged_epsilon = tf.Variable(0.)
s3 = tf.scalar_summary("Epsilon " + str(self.actor_id), logged_epsilon)
summary_vars = [episode_reward, episode_ave_max_q, logged_epsilon]
summary_placeholders = [tf.placeholder("float") for _ in range(len(summary_vars))]
update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
with tf.control_dependencies(update_ops):
summary_ops = tf.merge_all_summaries()
return summary_placeholders, update_ops, summary_ops
评论列表
文章目录