def plot(self, ax, idx1, idx2, range1, range2, n=100):
assert len(range1) == len(range2) == 2 and idx1 != idx2
x, y = np.mgrid[range1[0]:range1[1]:(n+0j), range2[0]:range2[1]:(n+0j)]
if isinstance(self.action_space, ContinuousSpace):
points_B_Doa = np.zeros((n*n, self.obsfeat_space.storage_size + self.action_space.storage_size))
points_B_Doa[:,idx1] = x.ravel()
points_B_Doa[:,idx2] = y.ravel()
obsfeat_B_Df, a_B_Da = points_B_Doa[:,:self.obsfeat_space.storage_size], points_B_Doa[:,self.obsfeat_space.storage_size:]
assert a_B_Da.shape[1] == self.action_space.storage_size
t_B = np.zeros(a_B_Da.shape[0]) # XXX make customizable
z = self.compute_reward(obsfeat_B_Df, a_B_Da, t_B).reshape(x.shape)
else:
obsfeat_B_Df = np.zeros((n*n, self.obsfeat_space.storage_size))
obsfeat_B_Df[:,idx1] = x.ravel()
obsfeat_B_Df[:,idx2] = y.ravel()
a_B_Da = np.zeros((obsfeat_B_Df.shape[0], 1), dtype=np.int32) # XXX make customizable
t_B = np.zeros(a_B_Da.shape[0]) # XXX make customizable
z = self.compute_reward(obsfeat_B_Df, a_B_Da, t_B).reshape(x.shape)
ax.pcolormesh(x, y, z, cmap='viridis')
ax.contour(x, y, z, levels=np.log(np.linspace(2., 3., 10)))
# ax.contourf(x, y, z, levels=[np.log(2.), np.log(2.)+.5], alpha=.5) # high-reward region is highlighted
评论列表
文章目录