diff --git a/p3_rl/analysis.py b/p3_rl/analysis.py index 74aa4cf..0c7c1ee 100644 --- a/p3_rl/analysis.py +++ b/p3_rl/analysis.py @@ -63,8 +63,7 @@ def question3e(): def question6(): answerEpsilon = None answerLearningRate = None - return answerEpsilon, answerLearningRate - # If not possible, return 'NOT POSSIBLE' + return 'NOT POSSIBLE' if __name__ == '__main__': print 'Answers to analysis questions:' diff --git a/p3_rl/qlearningAgents.py b/p3_rl/qlearningAgents.py index a417a5f..19d3226 100644 --- a/p3_rl/qlearningAgents.py +++ b/p3_rl/qlearningAgents.py @@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent): def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) - - "*** YOUR CODE HERE ***" + self.values = util.Counter() def getQValue(self, state, action): """ @@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent): Should return 0.0 if we have never seen a state or the Q node value otherwise """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() - + return self.values[(state, action)] def computeValueFromQValues(self, state): """ @@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent): there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() + actions = self.getLegalActions(state) + if not actions: + return 0.0 + return max([self.getQValue(state, action) for action in actions]) def computeActionFromQValues(self, state): """ @@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent): are no legal actions, which is the case at the terminal state, you should return None. """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() + actions = self.getLegalActions(state) + if not actions: + return None + valueActions = [(self.getQValue(state, action), action) + for action in actions] + maxValue, maxAction = max(valueActions) + return maxAction def getAction(self, state): """ @@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent): """ # Pick Action legalActions = self.getLegalActions(state) - action = None - "*** YOUR CODE HERE ***" - util.raiseNotDefined() - + if not legalActions: + action = None + elif util.flipCoin(self.epsilon): + action = random.choice(legalActions) + else: + action = self.computeActionFromQValues(state) return action def update(self, state, action, nextState, reward): @@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent): NOTE: You should never call this function, it will be called on your behalf """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() + sample = reward + self.discount * self.computeValueFromQValues(nextState) + value = self.getQValue(state, action) + newValue = (1 - self.alpha) * value + self.alpha * sample + self.values[(state, action)] = newValue def getPolicy(self, state): return self.computeActionFromQValues(state) @@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent): Should return Q(state,action) = w * featureVector where * is the dotProduct operator """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() + features = self.featExtractor.getFeatures(state, action) + value = 0 + for feature, featureValue in features.items(): + value += featureValue * self.weights[feature] + return value + def update(self, state, action, nextState, reward): """ Should update your weights based on transition """ - "*** YOUR CODE HERE ***" - util.raiseNotDefined() + features = self.featExtractor.getFeatures(state, action) + qMax = self.computeValueFromQValues(nextState) + difference = (reward + self.discount * qMax) - self.getQValue(state, action) + for feature, featureValue in features.items(): + weight = self.weights[feature] + newWeight = weight + self.alpha * difference * featureValue + self.weights[feature] = newWeight def final(self, state): "Called at the end of each game."