Fix minimax and implement alpha-beta-pruning (p2 q2 and q3).

2021-11-11 17:33:59 -05:00
parent 11dcc491a2
commit 43ad652269
1 changed files with 73 additions and 17 deletions
@@ -47,9 +47,10 @@ class ReflexAgent(Agent):
        bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
        chosenIndex = random.choice(bestIndices) # Pick randomly among the best
-        print(gameState)
+        # For debugging:
-        print(list(zip(scores, legalMoves)))
+        # print(gameState)
-        print("chosenAction", legalMoves[chosenIndex])
+        # print(list(zip(scores, legalMoves)))
        # print("chosenAction", legalMoves[chosenIndex])
        return legalMoves[chosenIndex]
@@ -100,6 +101,7 @@ def scoreEvaluationFunction(currentGameState):
    """
    return currentGameState.getScore()
 class MultiAgentSearchAgent(Agent):
    """
      This class provides some common elements to all of your
@@ -120,6 +122,7 @@ class MultiAgentSearchAgent(Agent):
        self.evaluationFunction = util.lookup(evalFn, globals())
        self.depth = int(depth)
 class MinimaxAgent(MultiAgentSearchAgent):
    """
      Your minimax agent (question 2)
@@ -127,23 +130,43 @@ class MinimaxAgent(MultiAgentSearchAgent):
    def getAction(self, gameState):
        """
-          Returns the minimax action from the current gameState using self.depth
+        Returns the minimax action from the current gameState using
-          and self.evaluationFunction.
+        self.depth and self.evaluationFunction.
-          Here are some method calls that might be useful when implementing minimax.
+        Here are some method calls that might be useful when implementing
        minimax.
-          gameState.getLegalActions(agentIndex):
+        gameState.getLegalActions(agentIndex):
-            Returns a list of legal actions for an agent
+          Returns a list of legal actions for an agent
-            agentIndex=0 means Pacman, ghosts are >= 1
+          agentIndex=0 means Pacman, ghosts are >= 1
-          gameState.generateSuccessor(agentIndex, action):
+        gameState.generateSuccessor(agentIndex, action):
-            Returns the successor game state after an agent takes an action
+          Returns the successor game state after an agent takes an action
-          gameState.getNumAgents():
+        gameState.getNumAgents():
-            Returns the total number of agents in the game
+          Returns the total number of agents in the game
        """
-        "*** YOUR CODE HERE ***"
+
-        util.raiseNotDefined()
+        numAgents = gameState.getNumAgents()
        totalDepth = self.depth * numAgents
        def value(depth, state):
            agentIndex = depth % numAgents
            actions = state.getLegalActions(agentIndex)
            if not actions or depth == totalDepth:
                return (self.evaluationFunction(state), "terminal")
            successorStates = [state.generateSuccessor(agentIndex, action) for action in actions]
            successorValueActionPairs = [(value(depth + 1, state)[0], action)
                                          for action, state in zip(actions, successorStates)]
            # Pacman (agentIndex=0) maximizes, ghosts minimize.
            if agentIndex == 0:
                return max(successorValueActionPairs)
            else:
                return min(successorValueActionPairs)
        # [0] is the best value, [1] is the best action
        return value(0, gameState)[1]
 class AlphaBetaAgent(MultiAgentSearchAgent):
    """
@@ -154,8 +177,39 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
        """
          Returns the minimax action using self.depth and self.evaluationFunction
        """
-        "*** YOUR CODE HERE ***"
+        numAgents = gameState.getNumAgents()
-        util.raiseNotDefined()
+        totalDepth = self.depth * numAgents
        def value(depth, state, alpha, beta):
            agentIndex = depth % numAgents
            actions = state.getLegalActions(agentIndex)
            if not actions or depth == totalDepth:
                return (self.evaluationFunction(state), "terminal")
            if agentIndex == 0:
                maxTuple = (-999999, "None")
                for action in actions:
                    newState = state.generateSuccessor(agentIndex, action)
                    newValue = value(depth + 1, newState, alpha, beta)[0]
                    newTuple = (newValue, action)
                    maxTuple = max((newValue, action), maxTuple)
                    if maxTuple[0] > beta:
                        return maxTuple
                    alpha = max(alpha, maxTuple[0])
                return maxTuple
            else:
                minTuple = (999999, "None")
                for action in actions:
                    newState = state.generateSuccessor(agentIndex, action)
                    newValue = value(depth + 1, newState, alpha, beta)[0]
                    minTuple = min((newValue, action), minTuple)
                    if minTuple[0] < alpha:
                        return minTuple
                    beta = min(beta, minTuple[0])
                return minTuple
        return value(0, gameState, alpha=-999999, beta=999999)[1]
 class ExpectimaxAgent(MultiAgentSearchAgent):
    """
@@ -172,6 +226,7 @@ class ExpectimaxAgent(MultiAgentSearchAgent):
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
 def betterEvaluationFunction(currentGameState):
    """
      Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
@@ -182,6 +237,7 @@ def betterEvaluationFunction(currentGameState):
    "*** YOUR CODE HERE ***"
    util.raiseNotDefined()
 # Abbreviation
 better = betterEvaluationFunction