python类Monitor()的实例源码

test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
test_monitor.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()
eval.py 文件源码 项目:deep-rl 作者: xinghai-sun 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def create_env(conf, monitor_on=False):
    env = gym.make(conf['env'])
    if conf['monitor_dir'] != '' and monitor_on:
        env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
    if conf['use_atari_wrapper']:
        env = AtariRescale42x42Wrapper(env)
        env = NormalizeWrapper(env)
    return env
run.py 文件源码 项目:deep-rl 作者: xinghai-sun 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def create_env(conf):
    env = gym.make(conf['env'])
    if conf['monitor_dir']:
        env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
    if conf['use_atari_wrapper']:
        env = AtariRescale42x42Wrapper(env)
        env = NormalizeWrapper(env)
    return env
test_wrappers.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_no_double_wrapping():
    temp = tempfile.mkdtemp()
    try:
        env = gym.make("FrozenLake-v0")
        env = wrappers.Monitor(env, temp)
        try:
            env = wrappers.Monitor(env, temp)
        except error.DoubleWrapperError:
            pass
        else:
            assert False, "Should not allow double wrapping"
        env.close()
    finally:
        shutil.rmtree(temp)
test_benchmark.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp)
        env.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
test_monitor.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()
DQfD_CartPole.py 文件源码 项目:DQfD 作者: go2sea 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_demo_data(env):
    # env = wrappers.Monitor(env, '/tmp/CartPole-v0', force=True)
    # agent.restore_model()
    with tf.variable_scope('get_demo_data'):
        agent = DQfDDDQN(env, DDQNConfig())

    e = 0
    while True:
        done = False
        score = 0  # sum of reward in one episode
        state = env.reset()
        demo = []
        while done is False:
            action = agent.egreedy_action(state)  # e-greedy action for train
            next_state, reward, done, _ = env.step(action)
            score += reward
            reward = reward if not done or score == 499 else -100
            agent.perceive([state, action, reward, next_state, done, 0.0])  # 0. means it is not a demo data
            demo.append([state, action, reward, next_state, done, 1.0])  # record the data that could be expert-data
            agent.train_Q_network(update=False)
            state = next_state
        if done:
            if score == 500:  # expert demo data
                demo = set_n_step(demo, Config.trajectory_n)
                agent.demo_buffer.extend(demo)
            agent.sess.run(agent.update_target_net)
            print("episode:", e, "  score:", score, "  demo_buffer:", len(agent.demo_buffer),
                  "  memory length:", len(agent.replay_buffer), "  epsilon:", agent.epsilon)
            if len(agent.demo_buffer) >= Config.demo_buffer_size:
                agent.demo_buffer = deque(itertools.islice(agent.demo_buffer, 0, Config.demo_buffer_size))
                break
        e += 1

    with open(Config.DEMO_DATA_PATH, 'wb') as f:
        pickle.dump(agent.demo_buffer, f, protocol=2)
dqn.py 文件源码 项目:MLAlgorithms 作者: rushter 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def init_environment(self, name='CartPole-v0', monitor=False):
        self.env = gym.make(name)
        if monitor:
            self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False)

        self.n_states = self.env.observation_space.shape[0]
        self.n_actions = self.env.action_space.n

        # Experience replay
        self.replay = []
atari_game_wrapper.py 文件源码 项目:DeepRL 作者: ChiWeiHsiao 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, game_name, histoy_length, render=False):
        self.env =  gym.make(game_name)
        #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
        self.render = render
        self.n_actions = self.env.action_space.n
        self.n_observation = len(self.env.observation_space.high)
        self.resize_width = 80
        self.resize_height = 80
        self.histoy_length = histoy_length  # One state contains 'histoy_length' frames
        self.state_buffer = deque() # Buffer keep 'histoy_length-1' frames
        self.show_game_info()
game_wrapper.py 文件源码 项目:DeepRL 作者: ChiWeiHsiao 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, game_name, histoy_length, render=False):
        self.env =  gym.make(game_name)
        #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
        self.render = render
        self.n_actions = self.env.action_space.n
        self.n_observation = len(self.env.observation_space.high)
        self.histoy_length = histoy_length  # One state contains 'histoy_length' observations
        self.state_buffer = deque() # Buffer keep 'histoy_length-1' observations
        self.show_game_info()


问题


面经


文章

微信
公众号

扫码关注公众号