acrobot.py 文件源码-python代码片段

acrobot.py 文件源码

python

阅读 40 收藏 0 点赞 0 评论 0

项目：hip-mdp-public 作者: dtak 项目源码文件源码

def calc_reward(self, action = None, state = None , **kw ):
        '''Calculates the continuous reward based on the height of the foot (y position) 
        with a penalty applied if the hinge is moving (we want the acrobot to be upright
        and stationary!), which is then normalized by the combined lengths of the links'''
        t = self.target
        if state is None:
            s = self.state
        else:
            s = state
            # Make sure that input state is clipped/wrapped to the given bounds (not guaranteed when coming from the BNN)
            s[0] = wrap( s[0] , -np.pi , np.pi )
            s[1] = wrap( s[1] , -np.pi , np.pi )
            s[2] = bound( s[2] , -self.MAX_VEL_1 , self.MAX_VEL_1 )
            s[3] = bound( s[3] , -self.MAX_VEL_1 , self.MAX_VEL_1 )

        hinge, foot = self.get_cartesian_points(s)
        reward = -0.05 * (foot[0] - self.LINK_LENGTH_1)**2

        terminal = self.is_terminal(s)
        return 10 if terminal else reward