kellycoinflip.py 文件源码-python代码片段

def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True):
        # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
        self.initialWealth=float(initialWealth)
        self.edgePriorAlpha=edgePriorAlpha
        self.edgePriorBeta=edgePriorBeta
        self.maxWealthAlpha=maxWealthAlpha
        self.maxWealthM=maxWealthM
        self.maxRoundsMean=maxRoundsMean
        self.maxRoundsSD=maxRoundsSD

        # draw this game's set of parameters:
        edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
        maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random))
        maxRounds = int(round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))

        # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
        # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
        self.maxEverWealth = float(self.initialWealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.roundsElapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth*100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, shape=[1]), # current wealth
            spaces.Discrete(maxRounds+1), # rounds elapsed
            spaces.Discrete(maxRounds+1), # wins
            spaces.Discrete(maxRounds+1), # losses
            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
        self.maxRounds = maxRounds
        self.rounds = self.maxRounds
        self.maxWealth = maxWealth
        if reseed or not hasattr(self, 'np_random') : self._seed()