def log(self, rewards, v_l, p_l, e_l, g_n, v_n, mean_advantages_m):
print(str(self.name), " episode_count", self.episode_count)
summary = tf.Summary()
summary.value.add(tag='Perf/Reward', simple_value=float(rewards))
# summary.value.add(tag='Perf/Length', simple_value=float(mean_length))
# summary.value.add(tag='Perf/Value', simple_value=float(mean_value))
summary.value.add(tag='Losses/Value Loss', simple_value=float(v_l))
summary.value.add(tag='Losses/Policy Loss', simple_value=float(p_l))
summary.value.add(tag='Losses/Entropy', simple_value=float(e_l))
summary.value.add(tag='Losses/Grad Norm', simple_value=float(g_n))
summary.value.add(tag='Losses/Var Norm', simple_value=float(v_n))
summary.value.add(tag='Losses/mean_advantages_m', simple_value=float(mean_advantages_m))
self.summary_writer.add_summary(summary, self.episode_count)
self.summary_writer.flush()
pass
评论列表
文章目录