def evaluate_and_update_max_score(self, t, episodes, env, agent):
eval_stats = eval_performance(
env, agent, self.n_runs,
max_episode_len=self.max_episode_len, explorer=self.explorer,
logger=self.logger)
elapsed = time.time() - self.start_time
custom_values = tuple(tup[1] for tup in agent.get_statistics())
mean = eval_stats['mean']
values = (t,
episodes,
elapsed,
mean,
eval_stats['median'],
eval_stats['stdev'],
eval_stats['max'],
eval_stats['min']) + custom_values
record_stats(self.outdir, values)
with self._max_score.get_lock():
if mean > self._max_score.value:
update_best_model(
agent, self.outdir, t, self._max_score.value, mean,
logger=self.logger)
self._max_score.value = mean
return mean
评论列表
文章目录