python类make()的实例源码-第2页-面圈网

model_based_learning.py 文件源码项目：cs234 作者: CalciferZh 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def main():
  env = gym.make('Stochastic-4x4-FrozenLake-v0')
  policy = learn_with_mdp_model(env)
  render_single(env, policy)

  # for i in range(10):
  #   print('\n%d' % i)
  #   env.render()
  #   print(env.step(env.action_space.sample()))
  # env.render()
  # for init_state in env.P.keys():
  #   for action in env.P[init_state]:
  #     print("\nState: %d, action: %d" % (init_state, action))
  #     for next_state in env.P[init_state][action]:
  #       print(next_state)
  # for _ in range(10):
  #   env.render()
  #   env.step(env.action_space.sample())

openaigym.py 文件源码项目：bolero 作者: rock-learning 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def init(self):
        gym.configuration.undo_logger_setup()

        self.env = gym.make(self.env_name)
        self.n_inputs, self.input_handler = self._init_space(
            self.env.action_space)
        self.inputs = np.empty(self.n_inputs)
        self.n_outputs, _ = self._init_space(self.env.observation_space)
        self.outputs = np.empty(self.n_outputs)

        if self.seed is not None:
            self.env.seed(self.seed)

        self.logger = get_logger(self, self.log_to_file, self.log_to_stdout)

        if self.log_to_stdout or self.log_to_file:
            self.logger.info("Number of inputs: %d" % self.n_inputs)
            self.logger.info("Number of outputs: %d" % self.n_outputs)

gym.py 文件源码项目：malmo-challenge 作者: Kaixhin 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)

gym.py 文件源码项目：malmo-challenge 作者: Microsoft 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)

tests.py 文件源码项目：vic-tensorflow 作者: sygi 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def deterministic_grid_test():
    env = gym.make("deterministic-grid-world-v0")
    prev_state = env.state
    for _ in xrange(100): env.step(0)  # noop
    assert env.state == prev_state

    while env.state[0] > 0:
        env.step(1)

    assert env.state[0] == 0
    env.step(1)
    assert env.state[0] == 0

    while env.state[1] < env.board_size[1] - 1:
        env.step(3)

    assert env.state[1] == env.board_size[1] - 1
    env.step(3)
    assert env.state[1] == env.board_size[1] - 1

grid_world_exp.py 文件源码项目：vic-tensorflow 作者: sygi 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def __init__(self, n_options=10, logger=None, plotting=False,
                 log_tf_graph=False):
        if logger is None:
            logger = logging.getLogger("logger")
            logger.setLevel(logging.INFO)
        self.logger = logger

        self.n_options = n_options
        self.env = gym.make("deterministic-grid-world-v0")
        self.n_actions = self.env.action_space.n
        self.n_states = 1 + reduce(lambda x, y: x*y,
             map(lambda x: x.n, self.env.observation_space.spaces))

        if plotting:
            self.plot_robots = [PlotRobot('dqn loss', 0, log_scale=True),
                                PlotRobot('q loss', 1), PlotRobot('rewards', 2)]
        else:
            self.plot_robots = [None] * 3
        self.plotting = self.plot_robots[2]

        self.colors = list('bgrcmyk') + ['magenta', 'lime', 'gray']
        self.build_graph(log_tf_graph)

try_pl.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def execute(symbol, begin, end, days, plot, model_path,random):
    print model_path
    model = load_model(model_path)
    env = gym.make('trading-v0').env
    env.initialise(symbol=symbol, start=begin, end=end, days=days, random = random)
    state_size = env.observation_space.shape[0]
    state = env.reset()
    done = False
    while not done:
        state = state.reshape(1, state_size)
        # state = state.reshape(1, 1, state_size)
        qval = model.predict(state, batch_size=1)
        action = (np.argmax(qval))
        state, _, done, info = env.step(action)

        # log.info("%s,%s,%s,%s",state, _, done, info)
        # log.info("\n%s", env.sim.to_df())
        if plot:
           env.render()

Environment.py 文件源码项目：-CuriousActorCritic 作者: skelneko 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, game="MsPacman-v0"):

        self.screen_h = Config.SCREEN_H
        self.screen_w = Config.SCREEN_W
        self.screen_shape = Config.SCREEN_SHAPE
        self.frame_per_row = Config.FRAME_PER_ROW
        self.frame_buffer = None

        self.action_space = 9

        # meta
        self.total_episode_run = 0
        self.steps_in_episode = 0
        self.max_steps_in_episode = 0

        self.env = gym.make(game)
        self.reset()

RoboschoolInvertedPendulum_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolInvertedPendulum-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break

RoboschoolHumanoidFlagrun_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolHumanoidFlagrun-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break

RoboschoolAnt_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolAnt-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break

RoboschoolReacher_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolReacher-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            print("score=%0.2f in %i frames" % (score, frame))
            break

RoboschoolHopper_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolHopper-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break

RoboschoolWalker2d_v0_2017may.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def demo_run():
    env = gym.make("RoboschoolWalker2d-v1")
    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                restart_delay = 60*2  # 2 sec at 60 fps
            else:
                restart_delay -= 1
                if restart_delay==0: break

multiplayer.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def multiplayer(self, env, game_server_guid, player_n):
        """
        That's the function you call between gym.make() and first env.reset(), to connect to multiplayer server.

        game_server_guid -- is an id that server and client use to identify themselves to belong to the same session.
        player_n -- integer, up to scene.players_count.

        You see here env._reset() gets overwritten, that means if you call env.reset(), it will not create
        single player scene on your side (as it usually do), but rather it will communicate to server, reset environment
        there. Same with step() and render().
        """
        self.shmem_client_init(game_server_guid, player_n)
        env._step   = self.shmem_client_step  # replace real function with fake, that communicates with environment on server
        env._reset  = self.shmem_client_reset
        env._render = self.shmem_client_rgb_array
        self.shmem_client_send_env_id()

multiplayer.py 文件源码项目：roboschool 作者: openai 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def read_env_id_and_create_env(self):
        self.sh_pipe_actready = open(self.sh_pipe_actready_filename, "rt")
        self.sh_pipe_obsready = os.open(self.sh_pipe_obsready_filename, os.O_WRONLY)
        env_id = self.sh_pipe_actready.readline()[:-1]
        if env_id.find("-v")==-1:
            raise ValueError("multiplayer client %s sent here invalid environment id '%s'" % (self.prefix, env_id))
        #
        # And at this point we know env_id.
        #
        print("Player %i connected, wants to operate %s in this scene" % (self.player_n, env_id))
        self.env = gym.make(env_id)  # gym.make() creates at least timeout wrapper, we need it.

        self.env.unwrapped.scene = self.scene
        self.env.unwrapped.player_n = self.player_n
        assert isinstance(self.env.observation_space, gym.spaces.Box)
        assert isinstance(self.env.action_space, gym.spaces.Box)
        self.sh_obs = np.memmap(self.prefix + "_obs",  mode="w+", shape=self.env.observation_space.shape, dtype=np.float32)
        self.sh_act = np.memmap(self.prefix + "_act",  mode="w+", shape=self.env.action_space.shape, dtype=np.float32)
        self.sh_rew = np.memmap(self.prefix + "_rew",  mode="w+", shape=(1,), dtype=np.float32)
        self.sh_rgb = np.memmap(self.prefix + "_rgb",  mode="w+", shape=(self.env.unwrapped.VIDEO_H,self.env.unwrapped.VIDEO_W,3), dtype=np.uint8)
        os.write(self.sh_pipe_obsready, b'accepted\n')

write_rollout_data.py 文件源码项目：gym 作者: openai 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("envid")
    parser.add_argument("outfile")
    parser.add_argument("--gymdir")

    args = parser.parse_args()
    if args.gymdir:
        sys.path.insert(0, args.gymdir)
    import gym
    from gym import utils
    print utils.colorize("gym directory: %s"%path.dirname(gym.__file__), "yellow")
    env = gym.make(args.envid)
    agent = RandomAgent(env.action_space)
    alldata = {}
    for i in xrange(2):
        np.random.seed(i)
        data = rollout(env, agent, env.spec.max_episode_steps)
        for (k, v) in data.items():
            alldata["%i-%s"%(i, k)] = v
    np.savez(args.outfile, **alldata)

a3C.py 文件源码项目：A3C 作者: go2sea 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, name, globalAC, config, mutex):
        self.mutex = mutex
        self.config = config
        self.env = gym.make(self.config.GAME).unwrapped  # ??-v0??????????
        self.name = name
        self.AC = ACNet(name, config, globalAC)

pacman.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
        env_id = "MsPacman"
        if deterministic:
            env_id += "Deterministic"
        env_id += "-v%d" % v

        env = gym.make(env_id)
        super(Pacman, self).__init__(env)
        self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
        self.death_penalty = death_penalty

pacman.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
        env_id = "MsPacman"
        if deterministic:
            env_id += "Deterministic"
        env_id += "-v%d" % v

        env = gym.make(env_id)
        super(Pacman, self).__init__(env)
        self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
        self.death_penalty = death_penalty