sarsa_fa.py 文件源码-python代码片段

sarsa_fa.py 文件源码
python
阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(SarsaFA, self).__init__()
        self.env = env
        self.env = wrappers.Monitor(self.env, monitor_path, force=True, video_callable=(None if video else False))
        m = usercfg.get("m", 10)  # Number of tilings
        self.config = dict(
            m=m,
            n_x_tiles=9,
            n_y_tiles=9,
            Lambda=0.9,
            epsilon=0,  # fully greedy in this case
            alpha=(0.05 * (0.5 / m)),
            gamma=1,
            n_iter=1000,
            steps_per_episode=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")  # Maximum number of allowed steps per episode, as determined (for this environment) by the gym library
        )
        self.config.update(usercfg)
        O = env.observation_space
        self.x_low, self.y_low = O.low
        self.x_high, self.y_high = O.high

        self.nA = env.action_space.n
        self.policy = EGreedy(self.config["epsilon"])
        self.function_approximation = TileCoding(self.x_low, self.x_high, self.y_low, self.y_high, m, self.config["n_x_tiles"], self.config["n_y_tiles"], self.nA)