def get_reward(self, next_state):
p, v, ID, a = next_state['p'], next_state['v'], int(next_state['ID']), next_state['a']
p_f, v_f, l_f = next_state['p_l1'], next_state['v_l1'], next_state['l_l1']
distance = (p_f-l_f) - p
h = distance / v
h = 10 if np.isinf(h) else h # avoid reward to inf
#desired_headway = 1
if h < 1.3 and h >= 1:
reward = 4*(1.3-h)
elif h > 0.7 and h < 1:
reward = 4*(h-0.7)
elif h >= 1.3:
reward = -2*(h-1.3)
else:
# h<=0.7
reward = -1*(0.7-h)
self.cars[ID].reward = reward
return reward
评论列表
文章目录