def __init__(self, num_pegs=3, num_discs=3, gamma=0.95):
'''
Args:
num_pegs (int)
num_discs (int)
gamma (float)
'''
self.num_pegs = num_pegs
self.num_discs = num_discs
HanoiMDP.ACTIONS = [str(x) + str(y) for x, y in itertools.product(xrange(self.num_pegs), xrange(self.num_pegs)) if x != y]
# Setup init state.
init_state = [" " for peg in xrange(num_pegs)]
x = ""
for i in xrange(num_discs):
x += chr(97 + i)
init_state[0] = x
init_state = State(data=init_state)
MDP.__init__(self, HanoiMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
评论列表
文章目录