Fix minimax and implement alpha-beta-pruning (p2 q2 and q3).

2021-11-11 17:33:59 -05:00
parent 11dcc491a2
commit 43ad652269
1 changed files with 73 additions and 17 deletions
--- a/p2_multiagent/multiAgents.py
+++ b/p2_multiagent/multiAgents.py
@@ -47,9 +47,10 @@ class ReflexAgent(Agent):
        bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
        chosenIndex = random.choice(bestIndices) # Pick randomly among the best

-        print(gameState)
-        print(list(zip(scores, legalMoves)))
-        print("chosenAction", legalMoves[chosenIndex])
+        # For debugging:
+        # print(gameState)
+        # print(list(zip(scores, legalMoves)))
+        # print("chosenAction", legalMoves[chosenIndex])

        return legalMoves[chosenIndex]

@@ -100,6 +101,7 @@ def scoreEvaluationFunction(currentGameState):
    """
    return currentGameState.getScore()

+
 class MultiAgentSearchAgent(Agent):
    """
      This class provides some common elements to all of your
@@ -120,6 +122,7 @@ class MultiAgentSearchAgent(Agent):
        self.evaluationFunction = util.lookup(evalFn, globals())
        self.depth = int(depth)

+
 class MinimaxAgent(MultiAgentSearchAgent):
    """
      Your minimax agent (question 2)
@@ -127,23 +130,43 @@ class MinimaxAgent(MultiAgentSearchAgent):

    def getAction(self, gameState):
        """
-          Returns the minimax action from the current gameState using self.depth
-          and self.evaluationFunction.
+        Returns the minimax action from the current gameState using
+        self.depth and self.evaluationFunction.

-          Here are some method calls that might be useful when implementing minimax.
+        Here are some method calls that might be useful when implementing
+        minimax.

-          gameState.getLegalActions(agentIndex):
-            Returns a list of legal actions for an agent
-            agentIndex=0 means Pacman, ghosts are >= 1
+        gameState.getLegalActions(agentIndex):
+          Returns a list of legal actions for an agent
+          agentIndex=0 means Pacman, ghosts are >= 1

-          gameState.generateSuccessor(agentIndex, action):
-            Returns the successor game state after an agent takes an action
+        gameState.generateSuccessor(agentIndex, action):
+          Returns the successor game state after an agent takes an action

-          gameState.getNumAgents():
-            Returns the total number of agents in the game
+        gameState.getNumAgents():
+          Returns the total number of agents in the game
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+
+        numAgents = gameState.getNumAgents()
+        totalDepth = self.depth * numAgents
+
+        def value(depth, state):
+            agentIndex = depth % numAgents
+            actions = state.getLegalActions(agentIndex)
+            if not actions or depth == totalDepth:
+                return (self.evaluationFunction(state), "terminal")
+            successorStates = [state.generateSuccessor(agentIndex, action) for action in actions]
+            successorValueActionPairs = [(value(depth + 1, state)[0], action)
+                                          for action, state in zip(actions, successorStates)]
+            # Pacman (agentIndex=0) maximizes, ghosts minimize.
+            if agentIndex == 0:
+                return max(successorValueActionPairs)
+            else:
+                return min(successorValueActionPairs)
+
+        # [0] is the best value, [1] is the best action
+        return value(0, gameState)[1]
+

 class AlphaBetaAgent(MultiAgentSearchAgent):
    """
@@ -154,8 +177,39 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
        """
          Returns the minimax action using self.depth and self.evaluationFunction
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        numAgents = gameState.getNumAgents()
+        totalDepth = self.depth * numAgents
+
+        def value(depth, state, alpha, beta):
+            agentIndex = depth % numAgents
+            actions = state.getLegalActions(agentIndex)
+            if not actions or depth == totalDepth:
+                return (self.evaluationFunction(state), "terminal")
+
+            if agentIndex == 0:
+                maxTuple = (-999999, "None")
+                for action in actions:
+                    newState = state.generateSuccessor(agentIndex, action)
+                    newValue = value(depth + 1, newState, alpha, beta)[0]
+                    newTuple = (newValue, action)
+                    maxTuple = max((newValue, action), maxTuple)
+                    if maxTuple[0] > beta:
+                        return maxTuple
+                    alpha = max(alpha, maxTuple[0])
+                return maxTuple
+            else:
+                minTuple = (999999, "None")
+                for action in actions:
+                    newState = state.generateSuccessor(agentIndex, action)
+                    newValue = value(depth + 1, newState, alpha, beta)[0]
+                    minTuple = min((newValue, action), minTuple)
+                    if minTuple[0] < alpha:
+                        return minTuple
+                    beta = min(beta, minTuple[0])
+                return minTuple
+
+        return value(0, gameState, alpha=-999999, beta=999999)[1]
+

 class ExpectimaxAgent(MultiAgentSearchAgent):
    """
@@ -172,6 +226,7 @@ class ExpectimaxAgent(MultiAgentSearchAgent):
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()

+
 def betterEvaluationFunction(currentGameState):
    """
      Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
@@ -182,6 +237,7 @@ def betterEvaluationFunction(currentGameState):
    "*** YOUR CODE HERE ***"
    util.raiseNotDefined()

+
 # Abbreviation
 better = betterEvaluationFunction