def __init__(self):
self.shape = (7, 10)
nS = np.prod(self.shape)
nA = 4
# Wind strength
winds = np.zeros(self.shape)
winds[:,[3,4,5,8]] = 1
winds[:,[6,7]] = 2
# Calculate transition probabilities
P = {}
for s in range(nS):
position = np.unravel_index(s, self.shape)
P[s] = { a : [] for a in range(nA) }
P[s][UP] = self._calculate_transition_prob(position, [-1, 0], winds)
P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1], winds)
P[s][DOWN] = self._calculate_transition_prob(position, [1, 0], winds)
P[s][LEFT] = self._calculate_transition_prob(position, [0, -1], winds)
# We always start in state (3, 0)
isd = np.zeros(nS)
isd[np.ravel_multi_index((3,0), self.shape)] = 1.0
super(WindyGridworldEnv, self).__init__(nS, nA, P, isd)
评论列表
文章目录