def main():
env = gym.make('Stochastic-4x4-FrozenLake-v0')
policy = learn_with_mdp_model(env)
render_single(env, policy)
# for i in range(10):
# print('\n%d' % i)
# env.render()
# print(env.step(env.action_space.sample()))
# env.render()
# for init_state in env.P.keys():
# for action in env.P[init_state]:
# print("\nState: %d, action: %d" % (init_state, action))
# for next_state in env.P[init_state][action]:
# print(next_state)
# for _ in range(10):
# env.render()
# env.step(env.action_space.sample())
评论列表
文章目录