async_learner.py 文件源码-python代码片段

def test(self, episodes_num=None, deterministic=True):
        if episodes_num is None:
            episodes_num = self.test_episodes_per_epoch

        test_start_time = time.time()
        test_rewards = []
        test_actions = []
        test_frameskips = []
        for _ in trange(episodes_num, desc="Testing", file=sys.stdout,
                        leave=False, disable=not self.enable_progress_bar):
            total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True)
            test_rewards.append(total_reward)
            test_actions += actions
            test_frameskips += frameskips

        self.doom_wrapper.reset()
        if self.local_network.has_state():
            self.local_network.reset_state()

        test_end_time = time.time()
        test_duration = test_end_time - test_start_time
        min_score = np.min(test_rewards)
        max_score = np.max(test_rewards)
        mean_score = np.mean(test_rewards)
        score_std = np.std(test_rewards)
        log(
            "TEST: mean: {}, min: {}, max: {}, test time: {}".format(
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                sec_to_str(test_duration)))
        return test_rewards, test_actions, test_frameskips