hopper_vrep_env.py 文件源码-python代码片段

hopper_vrep_env.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

def _step(self, action):
        # Clip xor Assert
        #actions = np.clip(actions,-self.joints_max_velocity, self.joints_max_velocity)
        #assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))

        # Actuate
        self._make_action(action)
        #self._make_action(action*self.joints_max_velocity)
        # Step
        self.step_simulation()
        # Observe
        self._make_observation()

        # Reward
        torso_pos_z  = self.observation[0] # up/down
        torso_lvel_x = self.observation[4]
        r_alive = 1.0

        reward = (16.0)*(r_alive) +(8.0)*(torso_lvel_x)

        # Early stop
        stand_threshold = 0.10
        done = (torso_pos_z < stand_threshold)

        return self.observation, reward, done, {}