def _step(self, action):
# Clip xor Assert
#actions = np.clip(actions,-self.joints_max_velocity, self.joints_max_velocity)
#assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
# Actuate
self._make_action(action)
#self._make_action(action*self.joints_max_velocity)
# Step
self.step_simulation()
# Observe
self._make_observation()
# Reward
torso_pos_z = self.observation[0] # up/down
torso_lvel_x = self.observation[4]
r_alive = 1.0
reward = (16.0)*(r_alive) +(8.0)*(torso_lvel_x)
# Early stop
stand_threshold = 0.10
done = (torso_pos_z < stand_threshold)
return self.observation, reward, done, {}
评论列表
文章目录