def __init__(self,
dimensions=(4, 12),
start_state=(3, 0),
goal_state=(3, 11),
cliff_states = [(3, x) for x in xrange(1, 11)],
cliff_reward = -100,
step_reward = -1,
max_steps = 100):
self.dimensions = dimensions
self.states = set(np.ndindex(dimensions))
self.start_state = start_state
self.goal_state = goal_state
self.cliff_states = cliff_states
self.cliff_reward = cliff_reward
self.step_reward = step_reward
self.max_steps = max_steps
assert self.is_state(self.start_state)
assert self.is_state(self.goal_state)
assert all(self.is_state(state) for state in self.cliff_states)
assert self.start_state not in self.cliff_states
self.start_episode()
评论列表
文章目录