Fix minimax and implement alpha-beta-pruning (p2 q2 and q3).

This commit is contained in:
2021-11-11 17:33:59 -05:00
parent 11dcc491a2
commit 43ad652269

View File

@@ -47,9 +47,10 @@ class ReflexAgent(Agent):
bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore] bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
chosenIndex = random.choice(bestIndices) # Pick randomly among the best chosenIndex = random.choice(bestIndices) # Pick randomly among the best
print(gameState) # For debugging:
print(list(zip(scores, legalMoves))) # print(gameState)
print("chosenAction", legalMoves[chosenIndex]) # print(list(zip(scores, legalMoves)))
# print("chosenAction", legalMoves[chosenIndex])
return legalMoves[chosenIndex] return legalMoves[chosenIndex]
@@ -100,6 +101,7 @@ def scoreEvaluationFunction(currentGameState):
""" """
return currentGameState.getScore() return currentGameState.getScore()
class MultiAgentSearchAgent(Agent): class MultiAgentSearchAgent(Agent):
""" """
This class provides some common elements to all of your This class provides some common elements to all of your
@@ -120,6 +122,7 @@ class MultiAgentSearchAgent(Agent):
self.evaluationFunction = util.lookup(evalFn, globals()) self.evaluationFunction = util.lookup(evalFn, globals())
self.depth = int(depth) self.depth = int(depth)
class MinimaxAgent(MultiAgentSearchAgent): class MinimaxAgent(MultiAgentSearchAgent):
""" """
Your minimax agent (question 2) Your minimax agent (question 2)
@@ -127,23 +130,43 @@ class MinimaxAgent(MultiAgentSearchAgent):
def getAction(self, gameState): def getAction(self, gameState):
""" """
Returns the minimax action from the current gameState using self.depth Returns the minimax action from the current gameState using
and self.evaluationFunction. self.depth and self.evaluationFunction.
Here are some method calls that might be useful when implementing minimax. Here are some method calls that might be useful when implementing
minimax.
gameState.getLegalActions(agentIndex): gameState.getLegalActions(agentIndex):
Returns a list of legal actions for an agent Returns a list of legal actions for an agent
agentIndex=0 means Pacman, ghosts are >= 1 agentIndex=0 means Pacman, ghosts are >= 1
gameState.generateSuccessor(agentIndex, action): gameState.generateSuccessor(agentIndex, action):
Returns the successor game state after an agent takes an action Returns the successor game state after an agent takes an action
gameState.getNumAgents(): gameState.getNumAgents():
Returns the total number of agents in the game Returns the total number of agents in the game
""" """
"*** YOUR CODE HERE ***"
util.raiseNotDefined() numAgents = gameState.getNumAgents()
totalDepth = self.depth * numAgents
def value(depth, state):
agentIndex = depth % numAgents
actions = state.getLegalActions(agentIndex)
if not actions or depth == totalDepth:
return (self.evaluationFunction(state), "terminal")
successorStates = [state.generateSuccessor(agentIndex, action) for action in actions]
successorValueActionPairs = [(value(depth + 1, state)[0], action)
for action, state in zip(actions, successorStates)]
# Pacman (agentIndex=0) maximizes, ghosts minimize.
if agentIndex == 0:
return max(successorValueActionPairs)
else:
return min(successorValueActionPairs)
# [0] is the best value, [1] is the best action
return value(0, gameState)[1]
class AlphaBetaAgent(MultiAgentSearchAgent): class AlphaBetaAgent(MultiAgentSearchAgent):
""" """
@@ -154,8 +177,39 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
""" """
Returns the minimax action using self.depth and self.evaluationFunction Returns the minimax action using self.depth and self.evaluationFunction
""" """
"*** YOUR CODE HERE ***" numAgents = gameState.getNumAgents()
util.raiseNotDefined() totalDepth = self.depth * numAgents
def value(depth, state, alpha, beta):
agentIndex = depth % numAgents
actions = state.getLegalActions(agentIndex)
if not actions or depth == totalDepth:
return (self.evaluationFunction(state), "terminal")
if agentIndex == 0:
maxTuple = (-999999, "None")
for action in actions:
newState = state.generateSuccessor(agentIndex, action)
newValue = value(depth + 1, newState, alpha, beta)[0]
newTuple = (newValue, action)
maxTuple = max((newValue, action), maxTuple)
if maxTuple[0] > beta:
return maxTuple
alpha = max(alpha, maxTuple[0])
return maxTuple
else:
minTuple = (999999, "None")
for action in actions:
newState = state.generateSuccessor(agentIndex, action)
newValue = value(depth + 1, newState, alpha, beta)[0]
minTuple = min((newValue, action), minTuple)
if minTuple[0] < alpha:
return minTuple
beta = min(beta, minTuple[0])
return minTuple
return value(0, gameState, alpha=-999999, beta=999999)[1]
class ExpectimaxAgent(MultiAgentSearchAgent): class ExpectimaxAgent(MultiAgentSearchAgent):
""" """
@@ -172,6 +226,7 @@ class ExpectimaxAgent(MultiAgentSearchAgent):
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
util.raiseNotDefined() util.raiseNotDefined()
def betterEvaluationFunction(currentGameState): def betterEvaluationFunction(currentGameState):
""" """
Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
@@ -182,6 +237,7 @@ def betterEvaluationFunction(currentGameState):
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
util.raiseNotDefined() util.raiseNotDefined()
# Abbreviation # Abbreviation
better = betterEvaluationFunction better = betterEvaluationFunction