def __init__(self, name, horizon, gamma):
"""
Constructor.
Args:
name (str): gym id of the environment;
horizon (int): horizon of the MDP;
horizon (int): the horizon;
gamma (float): the discount factor.
"""
self.__name__ = name
# MPD creation
self.env = gym.make(self.__name__)
self.env._max_episode_steps = np.inf # Hack to ignore gym time limit.
# MDP properties
assert not isinstance(self.env.observation_space,
gym_spaces.MultiDiscrete)
assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)
action_space = self._convert_gym_space(self.env.action_space)
observation_space = self._convert_gym_space(self.env.observation_space)
mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
if isinstance(action_space, Discrete):
self._convert_action = self._convert_action_function
else:
self._convert_action = self._no_convert
if isinstance(observation_space,
Discrete) and len(observation_space.size) > 1:
self._convert_state = self._convert_state_function
else:
self._convert_state = self._no_convert
super(Gym, self).__init__(mdp_info)
评论列表
文章目录