def calc_reward(self, action = None, state = None , **kw ):
'''Calculates the continuous reward based on the height of the foot (y position)
with a penalty applied if the hinge is moving (we want the acrobot to be upright
and stationary!), which is then normalized by the combined lengths of the links'''
t = self.target
if state is None:
s = self.state
else:
s = state
# Make sure that input state is clipped/wrapped to the given bounds (not guaranteed when coming from the BNN)
s[0] = wrap( s[0] , -np.pi , np.pi )
s[1] = wrap( s[1] , -np.pi , np.pi )
s[2] = bound( s[2] , -self.MAX_VEL_1 , self.MAX_VEL_1 )
s[3] = bound( s[3] , -self.MAX_VEL_1 , self.MAX_VEL_1 )
hinge, foot = self.get_cartesian_points(s)
reward = -0.05 * (foot[0] - self.LINK_LENGTH_1)**2
terminal = self.is_terminal(s)
return 10 if terminal else reward
评论列表
文章目录