python类Monitor()的实例源码

training.py 文件源码 项目:drl.pth 作者: seba-1511 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test(args, env, agent):
    if args.record:
        if 'env' in vars(args):
            env = wrappers.Monitor(env, './videos/' + args.env + str(time()) + '/')
        else:
            env = wrappers.Monitor(env, './videos/' + str(time()) + '/')
    test_rewards = []
    test_start = time()
    test_steps = 0
    for iteration in range(1, 1 + args.n_test_iter):
        state = env.reset()
        iter_rewards = 0.0
        done = False
        while not done:
            test_steps += 1
            action, _ = agent.forward(state)
            state, reward, done, _ = env.step(action)
            iter_rewards += reward
        test_rewards.append(iter_rewards)
    print_stats('Test', test_rewards, args.n_test_iter,
                time() - test_start, test_steps, 0, agent)
    return test_rewards
gym.py 文件源码 项目:malmo-challenge 作者: Kaixhin 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
gym.py 文件源码 项目:malmo-challenge 作者: Microsoft 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
a3c.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self, master, thread_id, clip_gradients=True):
        super(A3CThread, self).__init__(name=thread_id)
        self.thread_id = thread_id
        self.clip_gradients = clip_gradients
        self.env = make_environment(master.env_name)
        self.master = master
        self.config = master.config
        if thread_id == 0 and self.master.monitor:
            self.env = wrappers.Monitor(self.env, master.monitor_path, force=True, video_callable=(None if self.master.video else False))

        # Only used (and overwritten) by agents that use an RNN
        self.initial_features = None

        # Build actor and critic networks
        with tf.variable_scope("t{}_net".format(self.thread_id)):
            self.action, self.value, self.actor_states, self.critic_states, self.actions_taken, self.losses, self.adv, self.r, self.n_steps = self.build_networks()
            self.sync_net = self.create_sync_net_op()
            inc_step = self.master.global_step.assign_add(self.n_steps)
            self.train_op = tf.group(self.make_trainer(), inc_step)
        # Write the summary of each thread in a different directory
        self.writer = tf.summary.FileWriter(os.path.join(self.master.monitor_path, "thread" + str(self.thread_id)), self.master.session.graph)

        self.runner = RunnerThread(self.env, self, 20, thread_id == 0 and self.master.video)
sarsa_fa.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(SarsaFA, self).__init__()
        self.env = env
        self.env = wrappers.Monitor(self.env, monitor_path, force=True, video_callable=(None if video else False))
        m = usercfg.get("m", 10)  # Number of tilings
        self.config = dict(
            m=m,
            n_x_tiles=9,
            n_y_tiles=9,
            Lambda=0.9,
            epsilon=0,  # fully greedy in this case
            alpha=(0.05 * (0.5 / m)),
            gamma=1,
            n_iter=1000,
            steps_per_episode=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")  # Maximum number of allowed steps per episode, as determined (for this environment) by the gym library
        )
        self.config.update(usercfg)
        O = env.observation_space
        self.x_low, self.y_low = O.low
        self.x_high, self.y_high = O.high

        self.nA = env.action_space.n
        self.policy = EGreedy(self.config["epsilon"])
        self.function_approximation = TileCoding(self.x_low, self.x_high, self.y_low, self.y_high, m, self.config["n_x_tiles"], self.config["n_y_tiles"], self.nA)
karpathy.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(Karpathy, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.nA = self.env.action_space.n
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(dict(
            # timesteps_per_batch=10000,
            # n_iter=100,
            episode_max_length=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps"),
            gamma=0.99,
            learning_rate=0.05,
            batch_size=10,  # Amount of episodes after which to adapt gradients
            decay_rate=0.99,  # Used for RMSProp
            n_hidden_units=20,
            draw_frequency=50,  # Draw a plot every 50 episodes
            repeat_n_actions=1
        ))
        self.config.update(usercfg)
        self.build_network()
karpathy_cnn.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(KarpathyCNN, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.nA = env.action_space.n
        self.monitor_path = monitor_path
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(
            dict(
                # timesteps_per_batch=10000,
                # n_iter=100,
                n_hidden_units=200,
                learning_rate=1e-3,
                batch_size=10,  # Amount of episodes after which to adapt gradients
                gamma=0.99,  # Discount past rewards by a percentage
                decay=0.99,  # Decay of RMSProp optimizer
                epsilon=1e-9,  # Epsilon of RMSProp optimizer
                draw_frequency=50  # Draw a plot every 50 episodes
            )
        )
        self.config.update(usercfg)
        self.build_network()
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
loop_example.py 文件源码 项目:htm-agents 作者: ryanjmccall 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def main():
    episodeCount = 20
    stepsPerEpisode = 100

    env = gym.make("CartPole-v0")
    env = wrappers.Monitor(env, "/tmp/cartpole-experiment-1")
    for episode in range(episodeCount):
        observation = env.reset()
        for t in range(stepsPerEpisode):
            env.render()
            print(observation)
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
gym.py 文件源码 项目:malmo-challenge 作者: rhaps0dy 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
viz.py 文件源码 项目:evolution-strategies-starter 作者: openai 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy
    import numpy as np

    env = gym.make(env_id)
    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        while True:
            rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return
base_agent.py 文件源码 项目:gymmeforce 作者: lgvaz 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _create_env(self, monitor_dir, record_freq=None, max_episode_steps=None,
                    **kwargs):
        monitor_path = os.path.join(self.log_dir, monitor_dir)
        env = gym.make(self.env_name)
        if max_episode_steps is not None:
            env._max_episode_steps = max_episode_steps
        monitored_env = wrappers.Monitor(
            env=env,
            directory=monitor_path,
            resume=True,
            video_callable=lambda x: record_freq is not None and x % record_freq == True)
        if self.env_wrapper is not None:
            env = self.env_wrapper.wrap_env(monitored_env)
        else:
            env = monitored_env

        return monitored_env, env
gym.py 文件源码 项目:demeter 作者: evancasey 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, name,
                       log_dir,
                       obs_f = None,
                       reward_f = None,
                       clamp_actions = False,
                       monitor = False):

        self._env = gym.make(name)
        self.log_dir = log_dir
        if monitor:
            self._env = wrappers.Monitor(self._env, log_dir, force=True)

        self.obs_f = obs_f
        self.reward_f = reward_f
        self.clamp_actions = clamp_actions

        self.monitor = monitor
run_dqn_ram.py 文件源码 项目:deep-q-learning 作者: alvinwan 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_env(seed):
    env = gym.make('Pong-ram-v0')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind_ram(env)

    return env
run_dqn_atari.py 文件源码 项目:deep-q-learning 作者: alvinwan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_env(env_id, seed):
    env = gym.make(env_id)

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = './tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind(env)

    return env
run_dqn_atari.py 文件源码 项目:deep-q-learning 作者: alvinwan 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def get_custom_env(env_id, seed):
    env = gym.make(env_id)

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = './tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_custom(env)

    return env
run-soccer.py 文件源码 项目:nesgym 作者: codescv 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_env():
    env = gym.make('nesgym/NekketsuSoccerPK-v0')
    env = nesgym.wrap_nes_env(env)
    expt_dir = '/tmp/soccer/'
    env = wrappers.Monitor(env, os.path.join(expt_dir, "gym"), force=True)
    return env
run_dqn_ram.py 文件源码 项目:rl_algorithms 作者: DanielTakeshi 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_env(seed):
    env = gym.make('Pong-ram-v0')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind_ram(env)

    return env
run_dqn_atari.py 文件源码 项目:rl_algorithms 作者: DanielTakeshi 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_env(task, seed):
    env_id = task.env_id
    env = gym.make(env_id)
    set_global_seeds(seed)
    env.seed(seed)
    expt_dir = '/tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind(env)
    return env
es.py 文件源码 项目:rl_algorithms 作者: DanielTakeshi 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test(self, just_one=True):
        """ This is for test-time evaluation. No training is done here. By
        default, iterate through every snapshot.  If `just_one` is true, this
        only runs one set of weights, to ensure that we record right away since
        OpenAI will only record subsets and less frequently.  Changing the loop
        over snapshots is also needed.
        """
        os.makedirs(self.args.directory+'/videos')
        self.env = wrappers.Monitor(self.env, self.args.directory+'/videos', force=True)

        headdir = self.args.directory+'/snapshots/'
        snapshots = os.listdir(headdir)
        snapshots.sort()
        num_rollouts = 10
        if just_one:
            num_rollouts = 1

        for sn in snapshots:
            print("\n***** Currently on snapshot {} *****".format(sn))

            ### Add your own criteria here.
            # if "800" not in sn:
            #     continue
            ###

            with open(headdir+sn, 'rb') as f:
                weights = pickle.load(f)
            self.sess.run(self.set_params_op, 
                          feed_dict={self.new_weights_v: weights})
            returns = []
            for i in range(num_rollouts):
                returns.append( self._compute_return(test=True) )
            print("mean: \t{}".format(np.mean(returns)))
            print("std: \t{}".format(np.std(returns)))
            print("max: \t{}".format(np.max(returns)))
            print("min: \t{}".format(np.min(returns)))
            print("returns:\n{}".format(returns))
run_model.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main():
    args = parser.parse_args()
    env = make_environment(args.environment)
    runner = ModelRunner(env, args.model_directory, args.save_directory, n_iter=args.iterations)
    try:
        runner.env = wrappers.Monitor(runner.env, args.save_directory, video_callable=False, force=True)
        runner.run()
    except KeyboardInterrupt:
        pass
a2c.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(A2C, self).__init__(**usercfg)
        self.monitor_path = monitor_path

        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.env_runner = EnvRunner(self.env, self, usercfg)

        self.config.update(dict(
            timesteps_per_batch=10000,
            trajectories_per_batch=10,
            batch_update="timesteps",
            n_iter=100,
            gamma=0.99,
            actor_learning_rate=0.01,
            critic_learning_rate=0.05,
            actor_n_hidden=20,
            critic_n_hidden=20,
            repeat_n_actions=1,
            save_model=False
        ))
        self.config.update(usercfg)
        self.build_networks()
        init = tf.global_variables_initializer()
        # Launch the graph.
        self.session = tf.Session()
        self.session.run(init)
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
        self.rewards = tf.placeholder("float", name="Rewards")
        self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
        summary_actor_loss = tf.summary.scalar("Actor_loss", self.summary_actor_loss)
        summary_critic_loss = tf.summary.scalar("Critic_loss", self.summary_critic_loss)
        summary_rewards = tf.summary.scalar("Rewards", self.rewards)
        summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
        self.summary_op = tf.summary.merge([summary_actor_loss, summary_critic_loss, summary_rewards, summary_episode_lengths])
        self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "summaries"), self.session.graph)
        return
reinforce.py 文件源码 项目:DeepRL 作者: arnomoonens 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(REINFORCE, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.env_runner = EnvRunner(self.env, self, usercfg)
        self.monitor_path = monitor_path
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(dict(
            batch_update="timesteps",
            timesteps_per_batch=1000,
            n_iter=100,
            gamma=0.99,  # Discount past rewards by a percentage
            decay=0.9,  # Decay of RMSProp optimizer
            epsilon=1e-9,  # Epsilon of RMSProp optimizer
            learning_rate=0.05,
            n_hidden_units=20,
            repeat_n_actions=1,
            save_model=False
        ))
        self.config.update(usercfg)

        self.build_network()
        self.make_trainer()

        init = tf.global_variables_initializer()
        # Launch the graph.
        self.session = tf.Session()
        self.session.run(init)
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
        self.rewards = tf.placeholder("float", name="Rewards")
        self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
        summary_loss = tf.summary.scalar("Loss", self.summary_loss)
        summary_rewards = tf.summary.scalar("Rewards", self.rewards)
        summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
        self.summary_op = tf.summary.merge([summary_loss, summary_rewards, summary_episode_lengths])
        self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "task0"), self.session.graph)
test_wrappers.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_no_double_wrapping():
    temp = tempfile.mkdtemp()
    try:
        env = gym.make("FrozenLake-v0")
        env = wrappers.Monitor(env, temp)
        try:
            env = wrappers.Monitor(env, temp)
        except error.DoubleWrapperError:
            pass
        else:
            assert False, "Should not allow double wrapping"
        env.close()
    finally:
        shutil.rmtree(temp)
test_benchmark.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 51 收藏 0 点赞 0 评论 0
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp)
        env.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0


问题


面经


文章

微信
公众号

扫码关注公众号