Finish project 3.

2021-12-02 12:14:45 -05:00 · 2021-12-02 12:14:45 -05:00 · 730d3672cc
parent 8ba1f91f9b
commit 730d3672cc
2 changed files with 37 additions and 21 deletions
--- a/p3_rl/analysis.py
+++ b/p3_rl/analysis.py
@ -63,8 +63,7 @@ def question3e():
 def question6():
    answerEpsilon = None
    answerLearningRate = None
-    return answerEpsilon, answerLearningRate
+    return 'NOT POSSIBLE'
    # If not possible, return 'NOT POSSIBLE'
 if __name__ == '__main__':
    print 'Answers to analysis questions:'
--- a/p3_rl/qlearningAgents.py
+++ b/p3_rl/qlearningAgents.py
@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
-
+        self.values = util.Counter()
        "*** YOUR CODE HERE ***"
    def getQValue(self, state, action):
        """
@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
          Should return 0.0 if we have never seen a state
          or the Q node value otherwise
        """
-        "*** YOUR CODE HERE ***"
+        return self.values[(state, action)]
        util.raiseNotDefined()
    def computeValueFromQValues(self, state):
        """
@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
          there are no legal actions, which is the case at the
          terminal state, you should return a value of 0.0.
        """
-        "*** YOUR CODE HERE ***"
+        actions = self.getLegalActions(state)
-        util.raiseNotDefined()
+        if not actions:
            return 0.0
        return max([self.getQValue(state, action) for action in actions])
    def computeActionFromQValues(self, state):
        """
@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
          are no legal actions, which is the case at the terminal state,
          you should return None.
        """
-        "*** YOUR CODE HERE ***"
+        actions = self.getLegalActions(state)
-        util.raiseNotDefined()
+        if not actions:
            return None
        valueActions = [(self.getQValue(state, action), action)
                        for action in actions]
        maxValue, maxAction = max(valueActions)
        return maxAction
    def getAction(self, state):
        """
@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        if not legalActions:
            action = None
-        "*** YOUR CODE HERE ***"
+        elif util.flipCoin(self.epsilon):
-        util.raiseNotDefined()
+            action = random.choice(legalActions)
-
+        else:
            action = self.computeActionFromQValues(state)
        return action
    def update(self, state, action, nextState, reward):
@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
          NOTE: You should never call this function,
          it will be called on your behalf
        """
-        "*** YOUR CODE HERE ***"
+        sample = reward + self.discount * self.computeValueFromQValues(nextState)
-        util.raiseNotDefined()
+        value = self.getQValue(state, action)
        newValue = (1 - self.alpha) * value + self.alpha * sample
        self.values[(state, action)] = newValue
    def getPolicy(self, state):
        return self.computeActionFromQValues(state)
@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        """
-        "*** YOUR CODE HERE ***"
+        features = self.featExtractor.getFeatures(state, action)
-        util.raiseNotDefined()
+        value = 0
        for feature, featureValue in features.items():
            value += featureValue * self.weights[feature]
        return value
    def update(self, state, action, nextState, reward):
        """
           Should update your weights based on transition
        """
-        "*** YOUR CODE HERE ***"
+        features = self.featExtractor.getFeatures(state, action)
-        util.raiseNotDefined()
+        qMax = self.computeValueFromQValues(nextState)
        difference = (reward + self.discount * qMax) - self.getQValue(state, action)
        for feature, featureValue in features.items():
            weight = self.weights[feature]
            newWeight = weight + self.alpha * difference * featureValue
            self.weights[feature] = newWeight
    def final(self, state):
        "Called at the end of each game."