graphicsGridworldDisplay.py 文件源码

python
阅读 16 收藏 0 点赞 0 评论 0

项目:Reinforcement-Learning 作者: victorgrego 项目源码 文件源码
def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
  grid = gridworld.grid
  blank()
  stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
  qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
  qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
  minValue = min(qValueList)
  maxValue = max(qValueList)
  for x in range(grid.width):
    for y in range(grid.height):
      state = (x, y)
      gridType = grid[x][y]
      isExit = (str(gridType) != gridType)
      isCurrent = (currentState == state)
      actions = gridworld.getPossibleActions(state)
      if actions == None or len(actions) == 0:
        actions = [None]
      bestQ = max([qValues[(state, action)] for action in actions])
      bestActions = [action for action in actions if qValues[(state, action)] == bestQ]

      q = util.Counter()
      valStrings = {}
      for action in actions:
        v = qValues[(state, action)]
        q[action] += v
        valStrings[action] = '%.2f' % v
      if gridType == '#':
        drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
      elif isExit:
        action = 'exit'
        value = q[action]
        valString = '%.2f' % value
        drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
      else:
        drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
  pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
  text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号