algorithmic_env.py 文件源码-python代码片段

def __init__(self, base=10, chars=False, starting_min_length=2):
        """
        base: Number of distinct characters. 
        chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
               rendering.
        starting_min_length: Minimum input string length. Ramps up as episodes 
                             are consistently solved.
        """
        self.base = base
        # Keep track of this many past episodes
        self.last = 10
        # Cumulative reward earned this episode
        self.episode_total_reward = None
        # Running tally of reward shortfalls. e.g. if there were 10 points to earn and
        # we got 8, we'd append -2
        AlgorithmicEnv.reward_shortfalls = []
        if chars:
            self.charmap = [chr(ord('A')+i) for i in range(base)]
        else:
            self.charmap = [str(i) for i in range(base)]
        self.charmap.append(' ')
        # TODO: Not clear why this is a class variable rather than instance. 
        # Could lead to some spooky action at a distance if someone is working
        # with multiple algorithmic envs at once. Also makes testing tricky.
        AlgorithmicEnv.min_length = starting_min_length
        # Three sub-actions:
        #       1. Move read head left or write (or up/down)
        #       2. Write or not
        #       3. Which character to write. (Ignored if should_write=0)
        self.action_space = Tuple(
            [Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
        )
        # Can see just what is on the input tape (one of n characters, or nothing)
        self.observation_space = Discrete(self.base + 1)
        self._seed()
        self.reset()