def __init__(self,
dimensions=(3, 4),
start_state=(0, 0),
end_states=[(0, 3), (1, 3)],
nonstates = [(1, 1)],
state_rewards = {(0, 3): 1, (1, 3): -1},
step_reward = -0.1,
max_steps = 100):
self.dimensions = dimensions
self.states = set(np.ndindex(dimensions)).difference(set(nonstates))
self.start_state = start_state
self.end_states = end_states
self.state_rewards = state_rewards
self.step_reward = step_reward
self.max_steps = max_steps
assert self.is_state(self.start_state)
assert all(self.is_state(state) for state in self.end_states)
assert all(not self.is_state(state) for state in nonstates)
self.start_episode()
评论列表
文章目录