def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
np.square(action / scaling))
forward_reward = np.linalg.norm(self.get_body_comvel("torso")) # swimmer has no problem of jumping reward
reward = forward_reward - ctrl_cost
done = False
if self.sparse_rew:
if abs(self.get_body_com("torso")[0]) > 100.0:
reward = 1.0
done = True
else:
reward = 0.
com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1)
ori = self.get_ori()
return Step(next_obs, reward, done, com=com, ori=ori)
评论列表
文章目录