Fix minimax and implement alpha-beta-pruning (p2 q2 and q3).
This commit is contained in:
@@ -47,9 +47,10 @@ class ReflexAgent(Agent):
|
||||
bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
|
||||
chosenIndex = random.choice(bestIndices) # Pick randomly among the best
|
||||
|
||||
print(gameState)
|
||||
print(list(zip(scores, legalMoves)))
|
||||
print("chosenAction", legalMoves[chosenIndex])
|
||||
# For debugging:
|
||||
# print(gameState)
|
||||
# print(list(zip(scores, legalMoves)))
|
||||
# print("chosenAction", legalMoves[chosenIndex])
|
||||
|
||||
return legalMoves[chosenIndex]
|
||||
|
||||
@@ -100,6 +101,7 @@ def scoreEvaluationFunction(currentGameState):
|
||||
"""
|
||||
return currentGameState.getScore()
|
||||
|
||||
|
||||
class MultiAgentSearchAgent(Agent):
|
||||
"""
|
||||
This class provides some common elements to all of your
|
||||
@@ -120,6 +122,7 @@ class MultiAgentSearchAgent(Agent):
|
||||
self.evaluationFunction = util.lookup(evalFn, globals())
|
||||
self.depth = int(depth)
|
||||
|
||||
|
||||
class MinimaxAgent(MultiAgentSearchAgent):
|
||||
"""
|
||||
Your minimax agent (question 2)
|
||||
@@ -127,23 +130,43 @@ class MinimaxAgent(MultiAgentSearchAgent):
|
||||
|
||||
def getAction(self, gameState):
|
||||
"""
|
||||
Returns the minimax action from the current gameState using self.depth
|
||||
and self.evaluationFunction.
|
||||
Returns the minimax action from the current gameState using
|
||||
self.depth and self.evaluationFunction.
|
||||
|
||||
Here are some method calls that might be useful when implementing minimax.
|
||||
Here are some method calls that might be useful when implementing
|
||||
minimax.
|
||||
|
||||
gameState.getLegalActions(agentIndex):
|
||||
Returns a list of legal actions for an agent
|
||||
agentIndex=0 means Pacman, ghosts are >= 1
|
||||
gameState.getLegalActions(agentIndex):
|
||||
Returns a list of legal actions for an agent
|
||||
agentIndex=0 means Pacman, ghosts are >= 1
|
||||
|
||||
gameState.generateSuccessor(agentIndex, action):
|
||||
Returns the successor game state after an agent takes an action
|
||||
gameState.generateSuccessor(agentIndex, action):
|
||||
Returns the successor game state after an agent takes an action
|
||||
|
||||
gameState.getNumAgents():
|
||||
Returns the total number of agents in the game
|
||||
gameState.getNumAgents():
|
||||
Returns the total number of agents in the game
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
numAgents = gameState.getNumAgents()
|
||||
totalDepth = self.depth * numAgents
|
||||
|
||||
def value(depth, state):
|
||||
agentIndex = depth % numAgents
|
||||
actions = state.getLegalActions(agentIndex)
|
||||
if not actions or depth == totalDepth:
|
||||
return (self.evaluationFunction(state), "terminal")
|
||||
successorStates = [state.generateSuccessor(agentIndex, action) for action in actions]
|
||||
successorValueActionPairs = [(value(depth + 1, state)[0], action)
|
||||
for action, state in zip(actions, successorStates)]
|
||||
# Pacman (agentIndex=0) maximizes, ghosts minimize.
|
||||
if agentIndex == 0:
|
||||
return max(successorValueActionPairs)
|
||||
else:
|
||||
return min(successorValueActionPairs)
|
||||
|
||||
# [0] is the best value, [1] is the best action
|
||||
return value(0, gameState)[1]
|
||||
|
||||
|
||||
class AlphaBetaAgent(MultiAgentSearchAgent):
|
||||
"""
|
||||
@@ -154,8 +177,39 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
|
||||
"""
|
||||
Returns the minimax action using self.depth and self.evaluationFunction
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
numAgents = gameState.getNumAgents()
|
||||
totalDepth = self.depth * numAgents
|
||||
|
||||
def value(depth, state, alpha, beta):
|
||||
agentIndex = depth % numAgents
|
||||
actions = state.getLegalActions(agentIndex)
|
||||
if not actions or depth == totalDepth:
|
||||
return (self.evaluationFunction(state), "terminal")
|
||||
|
||||
if agentIndex == 0:
|
||||
maxTuple = (-999999, "None")
|
||||
for action in actions:
|
||||
newState = state.generateSuccessor(agentIndex, action)
|
||||
newValue = value(depth + 1, newState, alpha, beta)[0]
|
||||
newTuple = (newValue, action)
|
||||
maxTuple = max((newValue, action), maxTuple)
|
||||
if maxTuple[0] > beta:
|
||||
return maxTuple
|
||||
alpha = max(alpha, maxTuple[0])
|
||||
return maxTuple
|
||||
else:
|
||||
minTuple = (999999, "None")
|
||||
for action in actions:
|
||||
newState = state.generateSuccessor(agentIndex, action)
|
||||
newValue = value(depth + 1, newState, alpha, beta)[0]
|
||||
minTuple = min((newValue, action), minTuple)
|
||||
if minTuple[0] < alpha:
|
||||
return minTuple
|
||||
beta = min(beta, minTuple[0])
|
||||
return minTuple
|
||||
|
||||
return value(0, gameState, alpha=-999999, beta=999999)[1]
|
||||
|
||||
|
||||
class ExpectimaxAgent(MultiAgentSearchAgent):
|
||||
"""
|
||||
@@ -172,6 +226,7 @@ class ExpectimaxAgent(MultiAgentSearchAgent):
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
|
||||
def betterEvaluationFunction(currentGameState):
|
||||
"""
|
||||
Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
|
||||
@@ -182,6 +237,7 @@ def betterEvaluationFunction(currentGameState):
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
|
||||
# Abbreviation
|
||||
better = betterEvaluationFunction
|
||||
|
||||
|
||||
Reference in New Issue
Block a user