def reset_model(self):
self._min_strike_dist = np.inf
self._striked = False
self._strike_pos = None
qpos = self.init_qpos
self.ball = np.array([0.5, -0.175])
while True:
self.goal = np.concatenate([
self.np_random.uniform(low=0.15, high=0.7, size=1),
self.np_random.uniform(low=0.1, high=1.0, size=1)])
if np.linalg.norm(self.ball - self.goal) > 0.17:
break
qpos[-9:-7] = [self.ball[1], self.ball[0]]
qpos[-7:-5] = self.goal
diff = self.ball - self.goal
angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
qpos[-1] = angle / 3.14
qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
size=self.model.nv)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
评论列表
文章目录