def valueIteration(self, debugCallback = None, turbo = False):
'''using the value iteration algorithm (see AI: A Modern Approach (Third ed.) pag. 652)
calculate the utilities for all states in the grid world
the debugCallback must be a function that has three parameters:
policy: that the function can use to display intermediate results
isEnded: that the function can use to know if the valueIteration is ended
the debugCallback must return True, and can stop the algorithm returning False
the algorithm has a maximum number of iterations, in this way we can compute an
example with a discount factor = 1 that converge.
the turbo mode uses the utility vector of the (i-1)-th iteration to compute
the utility vector of the i-th iteration. The classic approach is different because
we compute the i-th iteration using the utility vector of the (i-1)-th iteration.
With this algorithm, using the turbo mode, we have an improvement of 30%
returns the number of iterations it needs for converge
'''
eps = Policy.valueIterationEpsilon
dfact = self.world.discFactor
c, r = self.world.size
if turbo: newUv = self.utilities
reiterate = True
start = time.process_time()
while(reiterate):
self.numOfIterations += 1
maxNorm = 0 #see the max norm definition in AI: A Modern Approach (Third ed.) pag. 654
if not turbo: newUv = self.__createEmptyUtilityVector()
for x in range(c):
for y in range(r):
v = self.__cellUtility(x, y) #calculate using the self.utilities (i.e. the previous step)
if not v is None: maxNorm = max(maxNorm, abs(self.utilities[y][x] - v))
newUv[y][x] = v #update the new utility vector that we are creating
if not turbo: self.utilities = newUv
if debugCallback: reiterate = debugCallback(self, False)
if maxNorm <= eps * (1 - dfact)/dfact: reiterate = False
end = time.process_time()
self.elapsed = end - start
if self.numOfIterations >= Policy.maxNumberOfIterations or self.elapsed > Policy.timeToLive:
reiterate = False
print("warning: max number of iterations exceeded")
messagebox.showwarning("Warning", "max number of iterations exceeded")
if debugCallback: reiterate = debugCallback(self, True)
return self.numOfIterations
评论列表
文章目录