def step(self, action, is_training=False):
self.prevPoseX = self.poseX
self.prevPoseY = self.poseY
if action == -1:
# Step with random action
action = int(random.random()*(self.action_size))
msg = Int8()
msg.data = action
self.pub_action_.publish( msg)
if self.display:
cv2.imshow("Screen", self.screen)
#cv2.waitKey(9)
dist = (self.poseX - self.goalX)**2 + (self.poseY - self.goalY)**2
reward = (self.prevDist - dist)/10.0
self.prevDist = dist
if self.terminal == 1:
reward -= 900
#self.new_random_game()
if dist < 0.9:
reward += 300
newStateMSG = EmptyMsg()
self.pub_new_goal_.publish( newStateMSG)
# cv2.waitKey(30)
# Add whatever info you want
info = ""
#rospy.loginfo("Episede ended, reward: %g", reward)
while(self.clock == self.lastClock):
pass
self.lastClock = self.clock
if self.terminal == 2:
self.sendTerminal = 1
if self.terminal == 1:
# rewd = Float32()
# rewd.data = reward
# self.pub_rew_.publish( rewd)
self.terminal = 2
return self.screen, reward, self.sendTerminal, info
#observation, reward, terminal, info = self.env.step(action)
#return self.preprocess(observation), reward, terminal, info
评论列表
文章目录