def test(self, episodes_num=None, deterministic=True):
if episodes_num is None:
episodes_num = self.test_episodes_per_epoch
test_start_time = time.time()
test_rewards = []
test_actions = []
test_frameskips = []
for _ in trange(episodes_num, desc="Testing", file=sys.stdout,
leave=False, disable=not self.enable_progress_bar):
total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True)
test_rewards.append(total_reward)
test_actions += actions
test_frameskips += frameskips
self.doom_wrapper.reset()
if self.local_network.has_state():
self.local_network.reset_state()
test_end_time = time.time()
test_duration = test_end_time - test_start_time
min_score = np.min(test_rewards)
max_score = np.max(test_rewards)
mean_score = np.mean(test_rewards)
score_std = np.std(test_rewards)
log(
"TEST: mean: {}, min: {}, max: {}, test time: {}".format(
green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
red("{:0.3f}".format(min_score)),
blue("{:0.3f}".format(max_score)),
sec_to_str(test_duration)))
return test_rewards, test_actions, test_frameskips
评论列表
文章目录