Finish project 3.

2021-12-02 12:14:45 -05:00
parent 8ba1f91f9b
commit 730d3672cc
2 changed files with 37 additions and 21 deletions
@@ -63,8 +63,7 @@ def question3e():
 def question6():
    answerEpsilon = None
    answerLearningRate = None
-    return answerEpsilon, answerLearningRate
-    # If not possible, return 'NOT POSSIBLE'
+    return 'NOT POSSIBLE'

 if __name__ == '__main__':
    print 'Answers to analysis questions:'
@@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
-
-        "*** YOUR CODE HERE ***"
+        self.values = util.Counter()

    def getQValue(self, state, action):
        """
@@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
          Should return 0.0 if we have never seen a state
          or the Q node value otherwise
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
-
+        return self.values[(state, action)]

    def computeValueFromQValues(self, state):
        """
@@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
          there are no legal actions, which is the case at the
          terminal state, you should return a value of 0.0.
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        actions = self.getLegalActions(state)
+        if not actions:
+            return 0.0
+        return max([self.getQValue(state, action) for action in actions])

    def computeActionFromQValues(self, state):
        """
@@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
          are no legal actions, which is the case at the terminal state,
          you should return None.
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        actions = self.getLegalActions(state)
+        if not actions:
+            return None
+        valueActions = [(self.getQValue(state, action), action)
+                        for action in actions]
+        maxValue, maxAction = max(valueActions)
+        return maxAction

    def getAction(self, state):
        """
@@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
-        action = None
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
-
+        if not legalActions:
+            action = None
+        elif util.flipCoin(self.epsilon):
+            action = random.choice(legalActions)
+        else:
+            action = self.computeActionFromQValues(state)
        return action

    def update(self, state, action, nextState, reward):
@@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
          NOTE: You should never call this function,
          it will be called on your behalf
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        sample = reward + self.discount * self.computeValueFromQValues(nextState)
+        value = self.getQValue(state, action)
+        newValue = (1 - self.alpha) * value + self.alpha * sample
+        self.values[(state, action)] = newValue

    def getPolicy(self, state):
        return self.computeActionFromQValues(state)
@@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        features = self.featExtractor.getFeatures(state, action)
+        value = 0
+        for feature, featureValue in features.items():
+            value += featureValue * self.weights[feature]
+        return value
+

    def update(self, state, action, nextState, reward):
        """
           Should update your weights based on transition
        """
-        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        features = self.featExtractor.getFeatures(state, action)
+        qMax = self.computeValueFromQValues(nextState)
+        difference = (reward + self.discount * qMax) - self.getQValue(state, action)
+        for feature, featureValue in features.items():
+            weight = self.weights[feature]
+            newWeight = weight + self.alpha * difference * featureValue
+            self.weights[feature] = newWeight

    def final(self, state):
        "Called at the end of each game."