def evaluate_and_update_max_score(self, t, episodes):
eval_stats = eval_performance(
self.env, self.agent, self.n_runs,
max_episode_len=self.max_episode_len, explorer=self.explorer,
logger=self.logger)
elapsed = time.time() - self.start_time
custom_values = tuple(tup[1] for tup in self.agent.get_statistics())
mean = eval_stats['mean']
values = (t,
episodes,
elapsed,
mean,
eval_stats['median'],
eval_stats['stdev'],
eval_stats['max'],
eval_stats['min']) + custom_values
record_stats(self.outdir, values)
if mean > self.max_score:
update_best_model(self.agent, self.outdir, t, self.max_score, mean,
logger=self.logger)
self.max_score = mean
return mean
评论列表
文章目录