def step(self, action, is_training):
if self.terminal == 0:
if action == -1:
# Step with random action
action = int(random.random()*(self.action_size))
self.actionToVel( action)
self.readyForNewData = True
if self.display:
cv2.imshow("Screen", self.screen)
cv2.waitKey(3)
dist = np.sqrt( (self.robotPose.position.x - self.goalPose.position.x)**2 + (self.robotPose.position.y - self.goalPose.position.y)**2)
#near obstacle penalty factor:
#nearObstPenalty = self.minFrontDist - 1.5
#reward = max( 0, ((self.prevDist - dist + 1.8)*3/dist)+min( 0, nearObstPenalty))
#self.prevDist = dist
reward = 0
if dist < 0.3:
reward += 1
#Select a new goal
theta = self.ang*random.random()
self.goalPose.position.x = self.r*np.cos( theta)
self.goalPose.position.y = self.r*np.sin( theta)
self.pub_goal_.publish( self.goalPose)
rwd = Float64()
rwd.data = 10101.963
self.pub_rew_.publish( rwd)
self.numWins += 1
if self.numWins == 99:
if self.ang < np.pi:
self.r += 1
self.ang += float(int(self.r/20)/10.0)
self.r %=20
self.nimWins = 0
self.resetStage()
# Add whatever info you want
info = ""
self.ep_reward += reward
if self.terminal == 1:
reward = -1
rewd = Float64()
rewd.data = self.ep_reward
self.pub_rew_.publish( rewd)
self.sendTerminal = 1
while( self.readyForNewData == True):
pass
if self.use_cumulated_reward:
return self.screen, self.ep_reward, self.sendTerminal, info
else:
return self.screen, reward, self.sendTerminal, info
#observation, reward, terminal, info = self.env.step(action)
#return self.preprocess(observation), reward, terminal, info
评论列表
文章目录