def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
self.actions = {}
for _ in range(iterations):
new_stateValues = {}
new_actionValue = {}
for state in self.mdp.getStates():
qvalue_list = []
possible_actions = mdp.getPossibleActions(state)
if (len(possible_actions) == 0):
new_stateValues[state] = 0
new_actionValue[state] = None
else:
for action in possible_actions:
qvalue_list.append((self.getQValue(state,action),action))
vvalue = max(qvalue_list)
new_stateValues[state] = vvalue[0]
new_actionValue[state] = vvalue[1]
self.values = new_stateValues
self.actions = new_actionValue
valueIterationAgents.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录