def step(self, action):
action = self._discrete_actions[action[0]]
sa = np.append(self._state, action)
new_state = odeint(self._dpds, sa, [0, self._dt])
self._state = new_state[-1, :-1]
if self._state[0] < -self.max_pos or \
np.abs(self._state[1]) > self.max_velocity:
reward = -1
absorbing = True
elif self._state[0] > self.max_pos and \
np.abs(self._state[1]) <= self.max_velocity:
reward = 1
absorbing = True
else:
reward = 0
absorbing = False
return self._state, reward, absorbing, {}
评论列表
文章目录