def main():
env = gym.make('Stochastic-4x4-FrozenLake-v0')
policy = learn_with_mdp_model(env)
render_single(env, policy)
# for i in range(10):
# print('\n%d' % i)
# env.render()
# print(env.step(env.action_space.sample()))
# env.render()
# for init_state in env.P.keys():
# for action in env.P[init_state]:
# print("\nState: %d, action: %d" % (init_state, action))
# for next_state in env.P[init_state][action]:
# print(next_state)
# for _ in range(10):
# env.render()
# env.step(env.action_space.sample())
python类make()的实例源码
def init(self):
gym.configuration.undo_logger_setup()
self.env = gym.make(self.env_name)
self.n_inputs, self.input_handler = self._init_space(
self.env.action_space)
self.inputs = np.empty(self.n_inputs)
self.n_outputs, _ = self._init_space(self.env.observation_space)
self.outputs = np.empty(self.n_outputs)
if self.seed is not None:
self.env.seed(self.seed)
self.logger = get_logger(self, self.log_to_file, self.log_to_stdout)
if self.log_to_stdout or self.log_to_file:
self.logger.info("Number of inputs: %d" % self.n_inputs)
self.logger.info("Number of outputs: %d" % self.n_outputs)
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
if isinstance(repeat_action, int):
assert repeat_action >= 1, "repeat_action should be >= 1"
elif isinstance(repeat_action, tuple):
assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'
super(GymEnvironment, self).__init__()
self._state_builder = state_builder
self._env = gym.make(env_name)
self._env.env.frameskip = repeat_action
self._no_op = max(0, no_op)
self._done = True
if monitoring_path is not None:
self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
if isinstance(repeat_action, int):
assert repeat_action >= 1, "repeat_action should be >= 1"
elif isinstance(repeat_action, tuple):
assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'
super(GymEnvironment, self).__init__()
self._state_builder = state_builder
self._env = gym.make(env_name)
self._env.env.frameskip = repeat_action
self._no_op = max(0, no_op)
self._done = True
if monitoring_path is not None:
self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
def deterministic_grid_test():
env = gym.make("deterministic-grid-world-v0")
prev_state = env.state
for _ in xrange(100): env.step(0) # noop
assert env.state == prev_state
while env.state[0] > 0:
env.step(1)
assert env.state[0] == 0
env.step(1)
assert env.state[0] == 0
while env.state[1] < env.board_size[1] - 1:
env.step(3)
assert env.state[1] == env.board_size[1] - 1
env.step(3)
assert env.state[1] == env.board_size[1] - 1
def __init__(self, n_options=10, logger=None, plotting=False,
log_tf_graph=False):
if logger is None:
logger = logging.getLogger("logger")
logger.setLevel(logging.INFO)
self.logger = logger
self.n_options = n_options
self.env = gym.make("deterministic-grid-world-v0")
self.n_actions = self.env.action_space.n
self.n_states = 1 + reduce(lambda x, y: x*y,
map(lambda x: x.n, self.env.observation_space.spaces))
if plotting:
self.plot_robots = [PlotRobot('dqn loss', 0, log_scale=True),
PlotRobot('q loss', 1), PlotRobot('rewards', 2)]
else:
self.plot_robots = [None] * 3
self.plotting = self.plot_robots[2]
self.colors = list('bgrcmyk') + ['magenta', 'lime', 'gray']
self.build_graph(log_tf_graph)
def execute(symbol, begin, end, days, plot, model_path,random):
print model_path
model = load_model(model_path)
env = gym.make('trading-v0').env
env.initialise(symbol=symbol, start=begin, end=end, days=days, random = random)
state_size = env.observation_space.shape[0]
state = env.reset()
done = False
while not done:
state = state.reshape(1, state_size)
# state = state.reshape(1, 1, state_size)
qval = model.predict(state, batch_size=1)
action = (np.argmax(qval))
state, _, done, info = env.step(action)
# log.info("%s,%s,%s,%s",state, _, done, info)
# log.info("\n%s", env.sim.to_df())
if plot:
env.render()
def __init__(self, game="MsPacman-v0"):
self.screen_h = Config.SCREEN_H
self.screen_w = Config.SCREEN_W
self.screen_shape = Config.SCREEN_SHAPE
self.frame_per_row = Config.FRAME_PER_ROW
self.frame_buffer = None
self.action_space = 9
# meta
self.total_episode_run = 0
self.steps_in_episode = 0
self.max_steps_in_episode = 0
self.env = gym.make(game)
self.reset()
RoboschoolInvertedPendulum_v0_2017may.py 文件源码
项目:roboschool
作者: openai
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def demo_run():
env = gym.make("RoboschoolInvertedPendulum-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
RoboschoolHumanoidFlagrun_v0_2017may.py 文件源码
项目:roboschool
作者: openai
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def demo_run():
env = gym.make("RoboschoolHumanoidFlagrun-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
def demo_run():
env = gym.make("RoboschoolAnt-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
def demo_run():
env = gym.make("RoboschoolReacher-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
print("score=%0.2f in %i frames" % (score, frame))
break
def demo_run():
env = gym.make("RoboschoolHopper-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
def demo_run():
env = gym.make("RoboschoolWalker2d-v1")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
def multiplayer(self, env, game_server_guid, player_n):
"""
That's the function you call between gym.make() and first env.reset(), to connect to multiplayer server.
game_server_guid -- is an id that server and client use to identify themselves to belong to the same session.
player_n -- integer, up to scene.players_count.
You see here env._reset() gets overwritten, that means if you call env.reset(), it will not create
single player scene on your side (as it usually do), but rather it will communicate to server, reset environment
there. Same with step() and render().
"""
self.shmem_client_init(game_server_guid, player_n)
env._step = self.shmem_client_step # replace real function with fake, that communicates with environment on server
env._reset = self.shmem_client_reset
env._render = self.shmem_client_rgb_array
self.shmem_client_send_env_id()
def read_env_id_and_create_env(self):
self.sh_pipe_actready = open(self.sh_pipe_actready_filename, "rt")
self.sh_pipe_obsready = os.open(self.sh_pipe_obsready_filename, os.O_WRONLY)
env_id = self.sh_pipe_actready.readline()[:-1]
if env_id.find("-v")==-1:
raise ValueError("multiplayer client %s sent here invalid environment id '%s'" % (self.prefix, env_id))
#
# And at this point we know env_id.
#
print("Player %i connected, wants to operate %s in this scene" % (self.player_n, env_id))
self.env = gym.make(env_id) # gym.make() creates at least timeout wrapper, we need it.
self.env.unwrapped.scene = self.scene
self.env.unwrapped.player_n = self.player_n
assert isinstance(self.env.observation_space, gym.spaces.Box)
assert isinstance(self.env.action_space, gym.spaces.Box)
self.sh_obs = np.memmap(self.prefix + "_obs", mode="w+", shape=self.env.observation_space.shape, dtype=np.float32)
self.sh_act = np.memmap(self.prefix + "_act", mode="w+", shape=self.env.action_space.shape, dtype=np.float32)
self.sh_rew = np.memmap(self.prefix + "_rew", mode="w+", shape=(1,), dtype=np.float32)
self.sh_rgb = np.memmap(self.prefix + "_rgb", mode="w+", shape=(self.env.unwrapped.VIDEO_H,self.env.unwrapped.VIDEO_W,3), dtype=np.uint8)
os.write(self.sh_pipe_obsready, b'accepted\n')
def main():
parser = argparse.ArgumentParser()
parser.add_argument("envid")
parser.add_argument("outfile")
parser.add_argument("--gymdir")
args = parser.parse_args()
if args.gymdir:
sys.path.insert(0, args.gymdir)
import gym
from gym import utils
print utils.colorize("gym directory: %s"%path.dirname(gym.__file__), "yellow")
env = gym.make(args.envid)
agent = RandomAgent(env.action_space)
alldata = {}
for i in xrange(2):
np.random.seed(i)
data = rollout(env, agent, env.spec.max_episode_steps)
for (k, v) in data.items():
alldata["%i-%s"%(i, k)] = v
np.savez(args.outfile, **alldata)
def __init__(self, name, globalAC, config, mutex):
self.mutex = mutex
self.config = config
self.env = gym.make(self.config.GAME).unwrapped # ??-v0??????????
self.name = name
self.AC = ACNet(name, config, globalAC)
def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
env_id = "MsPacman"
if deterministic:
env_id += "Deterministic"
env_id += "-v%d" % v
env = gym.make(env_id)
super(Pacman, self).__init__(env)
self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
self.death_penalty = death_penalty
def __init__(self, death_penalty=True, deterministic=True, v=3, **kwargs):
env_id = "MsPacman"
if deterministic:
env_id += "Deterministic"
env_id += "-v%d" % v
env = gym.make(env_id)
super(Pacman, self).__init__(env)
self.observation_space = gym.spaces.Box(0.0, 1.0, [42, 42, 1])
self.death_penalty = death_penalty