def step(self, action):
self.forward_dynamics(action)
comvel = self.get_body_comvel("torso")
forward_reward = comvel[0]
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
survive_reward = 0.05
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self._state
notdone = np.isfinite(state).all() \
and state[2] >= 0.2 and state[2] <= 1.0
done = not notdone
ob = self.get_current_obs()
return Step(ob, float(reward), done)
评论列表
文章目录