def step(self, action):
action = self._discrete_actions[action[0]]
action += np.random.uniform(-10., 10.)
sa = np.append(self._state, action)
new_state = odeint(self._dpds, sa, [0, self._dt])
self._state = new_state[-1, :-1]
self._state[0] = self._range_pi(self._state[0])
if np.abs(self._state[0]) > np.pi / 2.:
reward = -1
absorbing = True
else:
reward = 0
absorbing = False
return self._state, reward, absorbing, {}
评论列表
文章目录