def displayDistributionsOverPositions(self, distributions):
"""
Overlays a distribution over positions onto the pacman board that represents
an agent's beliefs about the positions of each agent.
The arg distributions is a tuple or list of util.Counter objects, where the i'th
Counter has keys that are board positions (x,y) and values that encode the probability
that agent i is at (x,y).
If some elements are None, then they will be ignored. If a Counter is passed to this
function, it will be displayed. This is helpful for figuring out if your agent is doing
inference correctly, and does not affect gameplay.
"""
dists = []
for dist in distributions:
if dist != None:
if not isinstance(dist, util.Counter): raise Exception("Wrong type of distribution")
dists.append(dist)
else:
dists.append(util.Counter())
if self.display != None and 'updateDistributions' in dir(self.display):
self.display.updateDistributions(dists)
else:
self._distributions = dists # These can be read by pacclient.py
python类Counter()的实例源码
def getFeatures(self, gameState, action):
features = util.Counter()
successor = self.getSuccessor(gameState, action)
myState = successor.getAgentState(self.index)
myPos = myState.getPosition()
# Computes whether we're on defense (1) or offense (0)
features['onDefense'] = 1
if myState.isPacman: features['onDefense'] = 0
# Computes distance to invaders we can see
enemies = [successor.getAgentState(i) for i in self.getOpponents(successor)]
invaders = [a for a in enemies if a.isPacman and a.getPosition() != None]
features['numInvaders'] = len(invaders)
if len(invaders) > 0:
dists = [self.getMazeDistance(myPos, a.getPosition()) for a in invaders]
features['invaderDistance'] = min(dists)
if action == Directions.STOP: features['stop'] = 1
rev = Directions.REVERSE[gameState.getAgentState(self.index).configuration.direction]
if action == rev: features['reverse'] = 1
return features
def getFeatures(self,state,action):
feats = util.Counter()
handcard = list(state)
handcard.remove(action)
handSet = set(handcard)
feats['???'] = len(handcard)
feats['?'] = len([ card for card in handSet if handcard.count(card) == 2])
feats['?'] = len([ card for card in handSet if handcard.count(card) == 3])
feats['?'] = len([ card for card in handSet if handcard.count(card) == 4])
for card in handSet :
if int(card/10) < 3 and card % 10 != 0:
if card %10 < 8 and (card+1 in handSet) and (card+2 in handSet):
feats['?'] += 1
return feats
valueIterationAgents.py 文件源码
项目:Reinforcement-Learning
作者: victorgrego
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
"*** YOUR CODE HERE ***"
def enhancedFeatureExtractorDigit(datum):
"""
Your feature extraction playground.
You should return a util.Counter() of features
for this datum (datum is of type samples.Datum).
## DESCRIBE YOUR ENHANCED FEATURES HERE...
##
"""
features = basicFeatureExtractorDigit(datum)
for x in range(DIGIT_DATUM_WIDTH):
for y in range(DIGIT_DATUM_HEIGHT):
if (datum.getPixel(x, y) > datum.getPixel(x, y - 1) ):
features[(x, y, 0)] = 1
else:
features[(x, y, 0)] = 0
return features
def basicFeatureExtractorPacman(state):
"""
A basic feature extraction function.
You should return a util.Counter() of features
for each (state, action) pair along with a list of the legal actions
##
"""
features = util.Counter()
for action in state.getLegalActions():
successor = state.generateSuccessor(0, action)
foodCount = successor.getFood().count()
featureCounter = util.Counter()
featureCounter['foodCount'] = foodCount
features[action] = featureCounter
return features, state.getLegalActions()
def findHighWeightFeatures(self, label):
"""
Returns a list of the 100 features with the greatest weight for some label
"""
featuresWeights = []
sorted_dict = util.Counter()
sorted_dict = self.weights[label]
sorted_dict = sorted_dict.sortedKeys()
temp = 0
for key in sorted_dict:
featuresWeights.append(key)
temp = temp + 1
if temp > 99:
break
return featuresWeights
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
def fit(self, trainingData, trainingLabels):
"""
Trains the classifier by collecting counts over the training data, and
stores the Laplace smoothed estimates so that they can be used to classify.
trainingData is a list of feature dictionaries. The corresponding
label lists contain the correct label for each instance.
To get the list of all possible features or labels, use self.features and self.legalLabels.
"""
self.features = trainingData[0].keys() # the names of the features in the dataset
self.prior = util.Counter() # probability over labels
self.conditionalProb = util.Counter() # Conditional probability of feature feat for a given class having value v
# HINT: could be indexed by (feat, label, value)
# TODO:
# construct (and store) the normalized smoothed priors and conditional probabilities
"*** YOUR CODE HERE ***"
def calculateLogJointProbabilities(self, instance):
"""
Returns the log-joint distribution over legal labels and the instance.
Each log-probability should be stored in the log-joint counter, e.g.
logJoint[3] = <Estimate of log( P(Label = 3, instance) )>
To get the list of all possible features or labels, use self.features and
self.legalLabels.
"""
logJoint = util.Counter()
for label in self.legalLabels:
#calculate the log joint probabilities for each class
"*** YOUR CODE HERE ***"
pass
return logJoint
def getFeatures(self, gameState, action):
"""
Returns a counter of features for the state
"""
features = util.Counter()
successor = self.getSuccessor(gameState, action)
features['successorScore'] = self.getScore(successor)
return features
def getFeatures(self, gameState, action):
"""
Returns a counter of features for the state
"""
features = util.Counter()
successor = self.getSuccessor(gameState, action)
features['successorScore'] = self.getScore(successor)
return features
def getFeatures(self, gameState, action):
features = util.Counter()
successor = self.getSuccessor(gameState, action)
foodList = self.getFood(successor).asList()
features['successorScore'] = -len(foodList)#self.getScore(successor)
# Compute distance to the nearest food
if len(foodList) > 0: # This should always be True, but better safe than sorry
myPos = successor.getAgentState(self.index).getPosition()
minDistance = min([self.getMazeDistance(myPos, food) for food in foodList])
features['distanceToFood'] = minDistance
return features
def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist
def getFeatures(self, gameState, action):
"""
Returns a counter of features for the state
"""
features = util.Counter()
successor = self.getSuccessor(gameState, action)
features['successorScore'] = self.getScore(successor)
return features
def getFeatures(self, gameState, action):
features = util.Counter()
successor = self.getSuccessor(gameState, action)
myState = successor.getAgentState(self.index)
myPos = myState.getPosition()
# Computes whether we're on defense (1) or offense (0)
features['onDefense'] = 1
# the line below prevent ghost become pacman, can lose the tie.
if myState.isPacman: features['onDefense'] = 0
# Computes distance to invaders we can see
enemies = [successor.getAgentState(i) for i in self.getOpponents(successor)]
invaders = [a for a in enemies if a.isPacman and a.getPosition() != None]
features['numInvaders'] = len(invaders)
if len(invaders) > 0:
dists = [self.getMazeDistance(myPos, a.getPosition()) for a in invaders]
# for a in invaders:
# print "Ghost position", a.getPosition(), "MyPosition: ", myPos
# features['invaderDistance'] = min(dists)
if action == Directions.STOP: features['stop'] = 1
rev = Directions.REVERSE[gameState.getAgentState(self.index).configuration.direction]
if action == rev: features['reverse'] = 1
return features
def setLearningTarget(self):
self.qValues = util.Counter()
def setLearningTarget(self):
self.weights = util.Counter()
def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist
def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist
def getDistribution(self, state):
"Returns a Counter encoding a distribution over actions from the provided state."
util.raiseNotDefined()
def getDistribution( self, state ):
dist = util.Counter()
for a in state.getLegalActions( self.index ): dist[a] = 1.0
dist.normalize()
return dist
def getDistribution( self, state ):
# Read variables from state
ghostState = state.getGhostState( self.index )
legalActions = state.getLegalActions( self.index )
pos = state.getGhostPosition( self.index )
isScared = ghostState.scaredTimer > 0
speed = 1
if isScared: speed = 0.5
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
pacmanPosition = state.getPacmanPosition()
# Select best actions given the state
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
if isScared:
bestScore = max( distancesToPacman )
bestProb = self.prob_scaredFlee
else:
bestScore = min( distancesToPacman )
bestProb = self.prob_attack
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
# Construct distribution
dist = util.Counter()
for a in bestActions: dist[a] = bestProb / len(bestActions)
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
dist.normalize()
return dist