def _step(self, action):
s, r, t, i = super()._step(action)
assert isinstance(self.env, Env)
assert isinstance(self.env.action_space, Box)
l = self.env.action_space.low
h = self.env.action_space.high
m = h - l
dif = (action - np.clip(action, l - self.slack * m, h + self.slack * m))
i.setdefault('unwrapped_reward', r)
r -= self.alpha * np.mean(np.square(dif / m))
return s, r, t, i
评论列表
文章目录