gridworld_continuous_enum.py 文件源码-python代码片段

gridworld_continuous_enum.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

def preprocess_samples(
        self,
        states: List[Dict[str, float]],
        actions: List[Dict[str, float]],
        rewards: List[float],
        next_states: List[Dict[str, float]],
        next_actions: List[Dict[str, float]],
        is_terminals: List[bool],
        possible_next_actions: List[List[Dict[str, float]]],
        reward_timelines: List[Dict[int, float]],
    ) -> TrainingDataPage:
        tdp = GridworldContinuous.preprocess_samples(
            self, states, actions, rewards, next_states, next_actions,
            is_terminals, possible_next_actions, reward_timelines
        )
        tdp.states = np.where(tdp.states == 1.0)[1].reshape(-1, 1
                                                           ).astype(np.float32)
        tdp.next_states = np.where(tdp.next_states == 1.0)[1].reshape(
            -1, 1
        ).astype(np.float32)
        return tdp