python类Tuple()的实例源码

pong_env.py 文件源码 项目:deep-rl 作者: xinghai-sun 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, ball_speed=4, bat_speed=4, max_num_rounds=20):
        SCREEN_WIDTH, SCREEN_HEIGHT = 160, 210
        self.observation_space = spaces.Tuple([
            spaces.Box(
                low=0, high=255, shape=(SCREEN_HEIGHT, SCREEN_WIDTH, 3)),
            spaces.Box(
                low=0, high=255, shape=(SCREEN_HEIGHT, SCREEN_WIDTH, 3))
        ])
        self.action_space = spaces.Tuple(
            [spaces.Discrete(3), spaces.Discrete(3)])

        pygame.init()
        self._surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))

        self._viewer = None
        self._game = PongGame(
            has_double_players=True,
            window_size=(SCREEN_WIDTH, SCREEN_HEIGHT),
            ball_speed=ball_speed,
            bat_speed=bat_speed,
            max_num_rounds=max_num_rounds)
train_deep_cnn.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
            spaces.Box(0.0, 1.0, (5, 2)),  # convolutional layer parameters
            spaces.Box(0.0, 1.0, (2, 2)),  # fully connected layer parameters
        ))

        # observation features, in order: num of instances, num of labels,
        # validation accuracy after training with given parameters
        self.observation_space = spaces.Box(-1e5, 1e5, 2)  # validation accuracy

        # Start the first game
        self._reset()
convergence.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
                                          spaces.Box(-5.0,0.0, 1), # learning rate
                                          spaces.Box(-7.0,-2.0, 1), # decay
                                          spaces.Box(-5.0,0.0, 1), # momentum
                                          spaces.Box(2, 8, 1), # batch size
                                          spaces.Box(-6.0,1.0, 1), # l1 reg
                                          spaces.Box(-6.0,1.0, 1), # l2 reg
                                           ))

        # observation features, in order: num of instances, num of labels,
        # number of filter in part A / B of neural net, num of neurons in
        # output layer, validation accuracy after training with given
        # parameters
        self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy

        # Start the first game
        self._reset()
classify.py 文件源码 项目:space-wrappers 作者: ngc92 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def is_compound(space):
    """ Checks whether a space is a compound space. These are non-scalar
        `Box` spaces, `MultiDiscrete`, `MultiBinary` and `Tuple` spaces
        (A Tuple space with a single, non-compound subspace is still considered
        compound).
        :raises TypeError: If the space is no `gym.Space`.
    """
    assert_space(space)

    if isinstance(space, spaces.Discrete):
        return False
    elif isinstance(space, spaces.Box):
        return len(space.shape) != 1 or space.shape[0] != 1
    elif isinstance(space, (spaces.MultiDiscrete, spaces.MultiBinary)):
        return True
    elif isinstance(space, spaces.Tuple):
        return True

    raise NotImplementedError("Unknown space {} of type {} supplied".format(space, type(space)))
classify.py 文件源码 项目:space-wrappers 作者: ngc92 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def num_discrete_actions(space):
    """
    For a discrete space, gets the number of available actions as a tuple.
    :param gym.Space space: The discrete space which to inspect.
    :return tuple: Tuple of integers containing the number of discrete actions.
    :raises TypeError: If the space is no `gym.Space`.
    """
    assert_space(space)

    if not is_discrete(space):
        raise TypeError("Space {} is not discrete".format(space))

    if isinstance(space, spaces.Discrete):
        return tuple((space.n,))
    elif isinstance(space, spaces.MultiDiscrete):
        # add +1 here as space.high is an inclusive bound
        return tuple(space.high - space.low + 1)
    elif isinstance(space, spaces.MultiBinary):
        return (2,) * space.n

    raise NotImplementedError("Unknown space {} of type {} supplied".format(space, type(space)))  # pragma: no cover
train_deep_cnn.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
            spaces.Box(0.0, 1.0, (5, 2)),  # convolutional layer parameters
            spaces.Box(0.0, 1.0, (2, 2)),  # fully connected layer parameters
        ))

        # observation features, in order: num of instances, num of labels,
        # validation accuracy after training with given parameters
        self.observation_space = spaces.Box(-1e5, 1e5, 2)  # validation accuracy

        # Start the first game
        self._reset()
convergence.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
                                          spaces.Box(-5.0,0.0, 1), # learning rate
                                          spaces.Box(-7.0,-2.0, 1), # decay
                                          spaces.Box(-5.0,0.0, 1), # momentum
                                          spaces.Box(2, 8, 1), # batch size
                                          spaces.Box(-6.0,1.0, 1), # l1 reg
                                          spaces.Box(-6.0,1.0, 1), # l2 reg
                                           ))

        # observation features, in order: num of instances, num of labels,
        # number of filter in part A / B of neural net, num of neurons in
        # output layer, validation accuracy after training with given
        # parameters
        self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy

        # Start the first game
        self._reset()
relay_env.py 文件源码 项目:relay-generator 作者: calclavia 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, dim=(14, 9)):
        self.dim = dim
        self.size = dim[0] * dim[1]
        self.max_blocks_per_turn = min(dim)
        self.target_difficulty = None
        self.target_pos = None

        # Observe the world
        self.observation_space = spaces.Tuple((
            spaces.Box(0, num_block_type, shape=dim),
            spaces.Box(np.array([0, 0]), np.array(dim)),
            spaces.Discrete(num_directions),
            spaces.Box(0, 1, shape=(1))
        ))

        # Actions allow the world to be populated.
        self.action_space = spaces.Discrete(num_actions)
train_deep_cnn.py 文件源码 项目:gym-adv 作者: lerrel 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
            spaces.Box(0.0, 1.0, (5, 2)),  # convolutional layer parameters
            spaces.Box(0.0, 1.0, (2, 2)),  # fully connected layer parameters
        ))

        # observation features, in order: num of instances, num of labels,
        # validation accuracy after training with given parameters
        self.observation_space = spaces.Box(-1e5, 1e5, 2)  # validation accuracy

        # Start the first game
        self._reset()
convergence.py 文件源码 项目:gym-adv 作者: lerrel 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
                                          spaces.Box(-5.0,0.0, 1), # learning rate
                                          spaces.Box(-7.0,-2.0, 1), # decay
                                          spaces.Box(-5.0,0.0, 1), # momentum
                                          spaces.Box(2, 8, 1), # batch size
                                          spaces.Box(-6.0,1.0, 1), # l1 reg
                                          spaces.Box(-6.0,1.0, 1), # l2 reg
                                           ))

        # observation features, in order: num of instances, num of labels,
        # number of filter in part A / B of neural net, num of neurons in
        # output layer, validation accuracy after training with given
        # parameters
        self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy

        # Start the first game
        self._reset()
music_env.py 文件源码 项目:gym-music 作者: calclavia 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self):
        self.observation_space = spaces.Discrete(NUM_CLASSES)
        self.action_space = spaces.Tuple(
            tuple(spaces.Discrete(2) for _ in range(NUM_CLASSES))
        )
        # Total number of notes
        self.num_notes = 32
        self.key = C_MAJOR_KEY
blackjack.py 文件源码 项目:rl_algorithms 作者: DanielTakeshi 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(32),
            spaces.Discrete(11),
            spaces.Discrete(2)))
        self._seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural
        # Start the first game
        self._reset()        # Number of 
        self.nA = 2
__init__.py 文件源码 项目:universe 作者: openai 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def repeat_space(space, n):
    return spaces.Tuple([space] * n)
grid_world.py 文件源码 项目:vic-tensorflow 作者: sygi 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, board_size=(5, 9), wind_proba=0.2, stay_wind=True):
        self.board_size = board_size
        self.wind_proba = wind_proba
        self.stay_wind = stay_wind
        self._seed()

        self._reset()
        self.action_space = spaces.Discrete(len(ACTION_MEANING))
        self.observation_space = spaces.Tuple(
            (spaces.Discrete(board_size[0]), spaces.Discrete(board_size[1])))
        self.window = None
algorithmic_env.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, base=10, chars=False, starting_min_length=2):
        """
        base: Number of distinct characters. 
        chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
               rendering.
        starting_min_length: Minimum input string length. Ramps up as episodes 
                             are consistently solved.
        """
        self.base = base
        # Keep track of this many past episodes
        self.last = 10
        # Cumulative reward earned this episode
        self.episode_total_reward = None
        # Running tally of reward shortfalls. e.g. if there were 10 points to earn and
        # we got 8, we'd append -2
        AlgorithmicEnv.reward_shortfalls = []
        if chars:
            self.charmap = [chr(ord('A')+i) for i in range(base)]
        else:
            self.charmap = [str(i) for i in range(base)]
        self.charmap.append(' ')
        # TODO: Not clear why this is a class variable rather than instance. 
        # Could lead to some spooky action at a distance if someone is working
        # with multiple algorithmic envs at once. Also makes testing tricky.
        AlgorithmicEnv.min_length = starting_min_length
        # Three sub-actions:
        #       1. Move read head left or write (or up/down)
        #       2. Write or not
        #       3. Which character to write. (Ignored if should_write=0)
        self.action_space = Tuple(
            [Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
        )
        # Can see just what is on the input tape (one of n characters, or nothing)
        self.observation_space = Discrete(self.base + 1)
        self._seed()
        self.reset()
offswitch_cartpole.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self):
        super(OffSwitchCartpoleEnv, self).__init__()
        self.observation_space = spaces.Tuple((spaces.Discrete(2), self.observation_space))
        self.left_threshold_crossed = False
        # number of episodes in which the cart crossed the left/right threshold (first).
        self.num_crosses = [0.,0.]
offswitch_cartpole_prob.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        super(OffSwitchCartpoleProbEnv, self).__init__()
        self.observation_space = spaces.Tuple((spaces.Discrete(2), self.observation_space))
        self.threshold_crossed = False
        # number of episodes in which the cart crossed the left/right threshold (first).
        self.num_crosses = [0.,0.]
predict_obs_cartpole.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self):
        super(PredictObsCartpoleEnv, self).__init__()
        self.cartpole = CartPoleEnv()

        self.observation_space = self.cartpole.observation_space
        self.action_space = spaces.Tuple((self.cartpole.action_space,) + (self.cartpole.observation_space,) * (NUM_PREDICTED_OBSERVATIONS))
blackjack.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, natural=False):
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(32),
            spaces.Discrete(11),
            spaces.Discrete(2)))
        self._seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural
        # Start the first game
        self._reset()
kellycoinflip.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):

        self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, [1]), # (w,b)
            spaces.Discrete(maxRounds+1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = initialWealth
        self.initialWealth = initialWealth
        self.maxRounds = maxRounds
        self.maxWealth = maxWealth
        self._seed()
        self._reset()
kellycoinflip.py 文件源码 项目:gym 作者: openai 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True):
        # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
        self.initialWealth=float(initialWealth)
        self.edgePriorAlpha=edgePriorAlpha
        self.edgePriorBeta=edgePriorBeta
        self.maxWealthAlpha=maxWealthAlpha
        self.maxWealthM=maxWealthM
        self.maxRoundsMean=maxRoundsMean
        self.maxRoundsSD=maxRoundsSD

        # draw this game's set of parameters:
        edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
        maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random))
        maxRounds = int(round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))

        # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
        # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
        self.maxEverWealth = float(self.initialWealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.roundsElapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth*100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, shape=[1]), # current wealth
            spaces.Discrete(maxRounds+1), # rounds elapsed
            spaces.Discrete(maxRounds+1), # wins
            spaces.Discrete(maxRounds+1), # losses
            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
        self.maxRounds = maxRounds
        self.rounds = self.maxRounds
        self.maxWealth = maxWealth
        if reseed or not hasattr(self, 'np_random') : self._seed()
ssbm_env.py 文件源码 项目:gym-dolphin 作者: vladfi1 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, spec):
    self.spec = spec

    self.space = spaces.Tuple([conv.space for _, conv in spec])
ssbm_env.py 文件源码 项目:gym-dolphin 作者: vladfi1 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, conv, permutation):
    self.conv = conv
    self.permutation = permutation

    self.space = spaces.Tuple([conv.space for _ in permutation])
space.py 文件源码 项目:reinforceflow 作者: dbobrenko 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def reshape(self, new_shape):
        raise NotImplementedError("Use reshape separately for each space in Tuple.")
gym_wrapper.py 文件源码 项目:reinforceflow 作者: dbobrenko 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, env,  obs_stack):
        super(ObservationStackWrap, self).__init__(env=env)
        assert obs_stack > 1, "Observation stack length must be higher than 1."
        assert not isinstance(self.observation_space, Tuple),\
            "Observation stack is not compatible with Tuple spaces."
        self._obs_stack_len = obs_stack or 1
        self.observation_space = self.env.observation_space
        new_shape = list(self.observation_space.shape)
        new_shape[-1] = self.observation_space.shape[-1] * obs_stack
        self.observation_space.reshape(tuple(new_shape))
        self._obs_stack = None
gym_wrapper.py 文件源码 项目:reinforceflow 作者: dbobrenko 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _make_rf2gym_converter(space):
    """Makes space converter function that maps space samples ReinforceFlow -> Gym."""
    # TODO: add spaces.MultiDiscrete support.
    if isinstance(space, spaces.Discrete):
        def converter(sample):
            return np.argmax(sample)
        return converter

    if isinstance(space, spaces.MultiBinary):
        def converter(sample):
            return tuple([np.argmax(s) for s in sample])
        return converter

    if isinstance(space, spaces.Box):
        return lambda sample: sample

    if isinstance(space, spaces.Tuple):
        sub_converters = []
        for sub_space in space.spaces:
            sub_converters.append(_make_rf2gym_converter(sub_space))

        def converter(sample):
            converted_tuple = []
            for sub_sample, sub_converter in zip(sample, sub_converters):
                converted_tuple.append(sub_converter(sub_sample))
            return tuple(converted_tuple)
        return converter
    raise ValueError("Unsupported space %s." % space)
process_frame.py 文件源码 项目:deep-rl 作者: xinghai-sun 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, env=None):
        super(AtariRescale42x42Wrapper, self).__init__(env)
        if isinstance(self.observation_space, spaces.Tuple):
            self.observation_space = spaces.Tuple([
                gym.spaces.Box(0.0, 1.0, [1, 42, 42])
                for space in self.env.observation_space.spaces
            ])
        else:
            self.observation_space = gym.spaces.Box(0.0, 1.0, [1, 42, 42])
minecraft_env.py 文件源码 项目:gym-minecraft 作者: tambetm 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _take_action(self, actions):
        # if there is only one action space, it wasn't wrapped in Tuple
        if len(self.action_spaces) == 1:
            actions = [actions]

        # send appropriate command for different actions
        for spc, cmds, acts in zip(self.action_spaces, self.action_names, actions):
            if isinstance(spc, spaces.Discrete):
                logger.debug(cmds[acts])
                self.agent_host.sendCommand(cmds[acts])
            elif isinstance(spc, spaces.Box):
                for cmd, val in zip(cmds, acts):
                    logger.debug(cmd + " " + str(val))
                    self.agent_host.sendCommand(cmd + " " + str(val))
            elif isinstance(spc, spaces.MultiDiscrete):
                for cmd, val in zip(cmds, acts):
                    logger.debug(cmd + " " + str(val))
                    self.agent_host.sendCommand(cmd + " " + str(val))
            else:
                logger.warn("Unknown action space for %s, ignoring." % cmds)
algorithmic_env.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, base=10, chars=False, starting_min_length=2):
        """
        base: Number of distinct characters. 
        chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
               rendering.
        starting_min_length: Minimum input string length. Ramps up as episodes 
                             are consistently solved.
        """
        self.base = base
        # Keep track of this many past episodes
        self.last = 10
        # Cumulative reward earned this episode
        self.episode_total_reward = None
        # Running tally of reward shortfalls. e.g. if there were 10 points to earn and
        # we got 8, we'd append -2
        AlgorithmicEnv.reward_shortfalls = []
        if chars:
            self.charmap = [chr(ord('A')+i) for i in range(base)]
        else:
            self.charmap = [str(i) for i in range(base)]
        self.charmap.append(' ')
        # TODO: Not clear why this is a class variable rather than instance. 
        # Could lead to some spooky action at a distance if someone is working
        # with multiple algorithmic envs at once. Also makes testing tricky.
        AlgorithmicEnv.min_length = starting_min_length
        # Three sub-actions:
        #       1. Move read head left or write (or up/down)
        #       2. Write or not
        #       3. Which character to write. (Ignored if should_write=0)
        self.action_space = Tuple(
            [Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
        )
        # Can see just what is on the input tape (one of n characters, or nothing)
        self.observation_space = Discrete(self.base + 1)
        self._seed()
        self.reset()
offswitch_cartpole.py 文件源码 项目:AI-Fight-the-Landlord 作者: YoungGer 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self):
        super(OffSwitchCartpoleEnv, self).__init__()
        self.observation_space = spaces.Tuple((spaces.Discrete(2), self.observation_space))
        self.left_threshold_crossed = False
        # number of episodes in which the cart crossed the left/right threshold (first).
        self.num_crosses = [0.,0.]


问题


面经


文章

微信
公众号

扫码关注公众号