def act(self, action):
try:
assert not self.terminal()
except AssertionError as e:
e.args += ('Further action not permitted: terminal state ' +
' reached. Episode is over.',)
raise
probs = self.T[action][self.state, :]
pmf = stats.rv_discrete(name='pmf',
values=(self.states, probs))
successor_state = pmf.rvs()
self.state = successor_state
r = self.rewards[successor_state]
return r
评论列表
文章目录