predict_obs_cartpole.py 文件源码-python代码片段

predict_obs_cartpole.py 文件源码

python

阅读 43 收藏 0 点赞 0 评论 0

def _step(self, action):
        # the first element of action is the actual current action
        current_action = action[0]

        observation, reward, done, info = self.cartpole._step(current_action)

        if not done:
            # We add the newly predicted observations to the list before checking predictions
            # in order to give the agent a chance to predict the observations that they
            # are going to get _this_ round.
            self.predicted_observations.append(action[1:])

            if self.iteration > TIME_BEFORE_BONUS_ALLOWED:
                for i in xrange(min(NUM_PREDICTED_OBSERVATIONS, len(self.predicted_observations))):
                    l2dist = np.sqrt(np.sum(np.square(np.subtract(
                        self.predicted_observations[-(i + 1)][i],
                        observation
                    ))))

                    bonus = CORRECT_PREDICTION_BONUS * (1 - math.erf(l2dist))

                    reward += bonus

            self.iteration += 1

        return observation, reward, done, info