def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
python类Counter()的实例源码
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist
textGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def displayValues(self, agent, currentState = None, message = None):
if message != None:
print message
values = util.Counter()
policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = agent.getValue(state)
policy[state] = agent.getPolicy(state)
prettyPrintValues(self.gridworld, values, policy, currentState)
textGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def displayQValues(self, agent, currentState = None, message = None):
if message != None: print message
qValues = util.Counter()
states = self.gridworld.getStates()
for state in states:
for action in self.gridworld.getPossibleActions(state):
qValues[(state, action)] = agent.getQValue(state, action)
prettyPrintQValues(self.gridworld, qValues, currentState)
def getFeatures(self, state, action):
feats = util.Counter()
feats[(state,action)] = 1.0
return feats
def getFeatures(self, state, action):
# extract the grid of food and wall locations and get the ghost locations
food = state.getFood()
walls = state.getWalls()
ghosts = state.getGhostPositions()
features = util.Counter()
features["bias"] = 1.0
# compute the location of pacman after he takes the action
x, y = state.getPacmanPosition()
dx, dy = Actions.directionToVector(action)
next_x, next_y = int(x + dx), int(y + dy)
# count the number of ghosts 1-step away
features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
# if there is no danger of ghosts then add the food feature
if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
features["eats-food"] = 1.0
dist = closestFood((next_x, next_y), food, walls)
if dist is not None:
# make the distance a number less than one otherwise the update
# will diverge wildly
features["closest-food"] = float(dist) / (walls.width * walls.height)
features.divideAll(10.0)
return features
graphicsGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def displayValues(self, agent, currentState = None, message = 'Agent Values'):
values = util.Counter()
policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = agent.getValue(state)
policy[state] = agent.getPolicy(state)
drawValues(self.gridworld, values, policy, currentState, message)
sleep(0.05 / self.speed)
graphicsGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def displayNullValues(self, currentState = None, message = ''):
values = util.Counter()
#policy = {}
states = self.gridworld.getStates()
for state in states:
values[state] = 0.0
#policy[state] = agent.getPolicy(state)
drawNullValues(self.gridworld, currentState,'')
# drawValues(self.gridworld, values, policy, currentState, message)
sleep(0.05 / self.speed)
graphicsGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
qValues = util.Counter()
states = self.gridworld.getStates()
for state in states:
for action in self.gridworld.getPossibleActions(state):
qValues[(state, action)] = agent.getQValue(state, action)
drawQValues(self.gridworld, qValues, currentState, message)
sleep(0.05 / self.speed)
graphicsGridworldDisplay.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
grid = gridworld.grid
blank()
stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
minValue = min(qValueList)
maxValue = max(qValueList)
for x in range(grid.width):
for y in range(grid.height):
state = (x, y)
gridType = grid[x][y]
isExit = (str(gridType) != gridType)
isCurrent = (currentState == state)
actions = gridworld.getPossibleActions(state)
if actions == None or len(actions) == 0:
actions = [None]
bestQ = max([qValues[(state, action)] for action in actions])
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
q = util.Counter()
valStrings = {}
for action in actions:
v = qValues[(state, action)]
q[action] += v
valStrings[action] = '%.2f' % v
if gridType == '#':
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
elif isExit:
action = 'exit'
value = q[action]
valString = '%.2f' % value
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
else:
drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
def __init__(self, **args):
"You can initialize Q-values here..."
ReinforcementAgent.__init__(self, **args)
self.Qvalues = util.Counter()
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
self.actions = {}
for _ in range(iterations):
new_stateValues = {}
new_actionValue = {}
for state in self.mdp.getStates():
qvalue_list = []
possible_actions = mdp.getPossibleActions(state)
if (len(possible_actions) == 0):
new_stateValues[state] = 0
new_actionValue[state] = None
else:
for action in possible_actions:
qvalue_list.append((self.getQValue(state,action),action))
vvalue = max(qvalue_list)
new_stateValues[state] = vvalue[0]
new_actionValue[state] = vvalue[1]
self.values = new_stateValues
self.actions = new_actionValue
def basicFeatureExtractorDigit(datum):
"""
Returns a set of pixel features indicating whether
each pixel in the provided datum is white (0) or gray/black (1)
"""
a = datum.getPixels()
features = util.Counter()
for x in range(DIGIT_DATUM_WIDTH):
for y in range(DIGIT_DATUM_HEIGHT):
if datum.getPixel(x, y) > 0:
features[(x,y)] = 1
else:
features[(x,y)] = 0
return features
def basicFeatureExtractorFace(datum):
"""
Returns a set of pixel features indicating whether
each pixel in the provided datum is an edge (1) or no edge (0)
"""
a = datum.getPixels()
features = util.Counter()
for x in range(FACE_DATUM_WIDTH):
for y in range(FACE_DATUM_HEIGHT):
if datum.getPixel(x, y) > 0:
features[(x,y)] = 1
else:
features[(x,y)] = 0
return features
def enhancedPacmanFeatures(state, action):
"""
For each state, this function is called with each legal action.
It should return a counter with { <feature name> : <feature value>, ... }
"""
features = util.Counter()
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
return features
def analysis(classifier, guesses, testLabels, testData, rawTestData, printImage):
"""
This function is called after learning.
Include any code that you want here to help you analyze your results.
Use the printImage(<list of pixels>) function to visualize features.
An example of use has been given to you.
- classifier is the trained classifier
- guesses is the list of labels predicted by your classifier on the test set
- testLabels is the list of true labels
- testData is the list of training datapoints (as util.Counter of features)
- rawTestData is the list of training datapoints (as samples.Datum)
- printImage is a method to visualize the features
(see its use in the odds ratio part in runClassifier method)
This code won't be evaluated. It is for your own optional use
(and you can modify the signature if you want).
"""
# Put any code here...
# Example of use:
# for i in range(len(guesses)):
# prediction = guesses[i]
# truth = testLabels[i]
# if (prediction != truth):
# print "==================================="
# print "Mistake on example %d" % i
# print "Predicted %d; truth is %d" % (prediction, truth)
# print "Image: "
# print rawTestData[i]
# break
## =====================
## You don't have to modify any code below.
## =====================
def __init__( self, legalLabels, max_iterations):
self.legalLabels = legalLabels
self.type = "perceptron"
self.max_iterations = max_iterations
self.weights = {}
for label in legalLabels:
self.weights[label] = util.Counter() # this is the data-structure you should use