def _step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
# Action Step
self.A.speed = (action[0]+action[1])/2.0 * 5.0
self.A.dir_Angle += math.atan((action[0] - action[1]) * self.A.speed / 2.0 / 5.0)
self.A.dir_Angle = ( self.A.dir_Angle + np.pi) % (2 * np.pi ) - np.pi
done = self.A.Move([self.BBox])
self.A.Sens(self.Course)
self.state = (1,) if self.A.EYE.obj == 1 else (0,)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Robot just went out over the boundary
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state), reward, done, {'AgentPos':(self.A.pos_x,self.A.pos_y),'AegntDir':self.A.dir_Angle}
评论列表
文章目录