def step(self, action, is_training):
if self.terminal == 0:
if action == -1:
# Step with random action
action = int(random.random()*(self.action_size))
self.actionToVel( action)
self.readyForNewData = True
if self.display:
cv2.imshow("Screen", self.screen)
cv2.waitKey(3)
dist = np.sqrt( (self.robotPose.position.x - self.goalPose.position.x)**2 + (self.robotPose.position.y - self.goalPose.position.y)**2)
#near obstacle penalty factor:
nearObstPenalty = self.minFrontDist - 1.5
reward = max( 0, ((self.prevDist - dist + 1.8)*3/dist)+min( 0, nearObstPenalty))
self.prevDist = dist
if dist < 0.9:
reward += 300
#Select a new goal
d = -1
while d < 3.9:
theta = 2.0 * np.pi * random.random()
r = random.random()*19.5
self.goalPose.position.x = r*np.cos( theta)
self.goalPose.position.y = r*np.sin( theta)
d = np.sqrt( (self.robotPose.position.x - self.goalPose.position.x)**2 + (self.robotPose.position.y - self.goalPose.position.y)**2)
self.pub_goal_.publish( self.goalPose)
self.prevDist = d
rwd = Float64()
rwd.data = 10101.963
self.pub_rew_.publish( rwd)
self.numWins += 1
if self.numWins == 99:
reward += 9000
self.terminal = 1
# Add whatever info you want
info = ""
self.ep_reward += reward
if self.terminal == 1:
rewd = Float64()
rewd.data = self.ep_reward
self.pub_rew_.publish( rewd)
self.sendTerminal = 1
while( self.readyForNewData == True):
pass
if self.use_cumulated_reward:
return self.screen, self.ep_reward, self.sendTerminal, info
else:
return self.screen, reward, self.sendTerminal, info
#observation, reward, terminal, info = self.env.step(action)
#return self.preprocess(observation), reward, terminal, info
评论列表
文章目录