gridworld_base.py 文件源码-python代码片段

gridworld_base.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

def true_values_for_sample(
        self, states, actions, assume_optimal_policy: bool
    ):
        true_q_values = self.true_q_values(DISCOUNT, assume_optimal_policy)
        print("TRUE Q")
        print(true_q_values.reshape([5, 5]))
        results = []
        for x in range(len(states)):
            int_state = int(list(states[x].keys())[0])
            next_state = self.move_on_index_limit(int_state, actions[x])
            if self.is_terminal(int_state):
                results.append(self.reward(int_state))
            else:
                results.append(
                    self.reward(int_state) +
                    (DISCOUNT * true_q_values[next_state])
                )
        return results