def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True):
# store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
self.initialWealth=float(initialWealth)
self.edgePriorAlpha=edgePriorAlpha
self.edgePriorBeta=edgePriorBeta
self.maxWealthAlpha=maxWealthAlpha
self.maxWealthM=maxWealthM
self.maxRoundsMean=maxRoundsMean
self.maxRoundsSD=maxRoundsSD
# draw this game's set of parameters:
edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random))
maxRounds = int(round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))
# add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
# alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
self.maxEverWealth = float(self.initialWealth)
# for the coinflip edge, it is total wins/losses:
self.wins = 0
self.losses = 0
# for the number of rounds, we need to remember how many rounds we've played:
self.roundsElapsed = 0
# the rest proceeds as before:
self.action_space = spaces.Discrete(int(maxWealth*100))
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, shape=[1]), # current wealth
spaces.Discrete(maxRounds+1), # rounds elapsed
spaces.Discrete(maxRounds+1), # wins
spaces.Discrete(maxRounds+1), # losses
spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
self.reward_range = (0, maxWealth)
self.edge = edge
self.wealth = self.initialWealth
self.maxRounds = maxRounds
self.rounds = self.maxRounds
self.maxWealth = maxWealth
if reseed or not hasattr(self, 'np_random') : self._seed()
评论列表
文章目录