def on_step_begin(self, step, logs):
if self.step % self.interval == 0:
if len(self.episode_rewards) > 0:
metrics = np.array(self.metrics)
assert metrics.shape == (self.interval, len(self.metrics_names))
formatted_metrics = ''
if not np.isnan(metrics).all(): # not all values are means
means = np.nanmean(self.metrics, axis=0)
assert means.shape == (len(self.metrics_names),)
for name, mean in zip(self.metrics_names, means):
formatted_metrics += ' - {}: {:.3f}'.format(name, mean)
formatted_infos = ''
if len(self.infos) > 0:
infos = np.array(self.infos)
if not np.isnan(infos).all(): # not all values are means
means = np.nanmean(self.infos, axis=0)
assert means.shape == (len(self.info_names),)
for name, mean in zip(self.info_names, means):
formatted_infos += ' - {}: {:.3f}'.format(name, mean)
print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos))
print('')
self.reset()
print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))
评论列表
文章目录