Finish project 3.

main
Felix Martin 2021-12-02 12:14:45 -05:00
parent 8ba1f91f9b
commit 730d3672cc
2 changed files with 37 additions and 21 deletions

View File

@ -63,8 +63,7 @@ def question3e():
def question6(): def question6():
answerEpsilon = None answerEpsilon = None
answerLearningRate = None answerLearningRate = None
return answerEpsilon, answerLearningRate return 'NOT POSSIBLE'
# If not possible, return 'NOT POSSIBLE'
if __name__ == '__main__': if __name__ == '__main__':
print 'Answers to analysis questions:' print 'Answers to analysis questions:'

View File

@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
def __init__(self, **args): def __init__(self, **args):
"You can initialize Q-values here..." "You can initialize Q-values here..."
ReinforcementAgent.__init__(self, **args) ReinforcementAgent.__init__(self, **args)
self.values = util.Counter()
"*** YOUR CODE HERE ***"
def getQValue(self, state, action): def getQValue(self, state, action):
""" """
@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
Should return 0.0 if we have never seen a state Should return 0.0 if we have never seen a state
or the Q node value otherwise or the Q node value otherwise
""" """
"*** YOUR CODE HERE ***" return self.values[(state, action)]
util.raiseNotDefined()
def computeValueFromQValues(self, state): def computeValueFromQValues(self, state):
""" """
@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
there are no legal actions, which is the case at the there are no legal actions, which is the case at the
terminal state, you should return a value of 0.0. terminal state, you should return a value of 0.0.
""" """
"*** YOUR CODE HERE ***" actions = self.getLegalActions(state)
util.raiseNotDefined() if not actions:
return 0.0
return max([self.getQValue(state, action) for action in actions])
def computeActionFromQValues(self, state): def computeActionFromQValues(self, state):
""" """
@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
are no legal actions, which is the case at the terminal state, are no legal actions, which is the case at the terminal state,
you should return None. you should return None.
""" """
"*** YOUR CODE HERE ***" actions = self.getLegalActions(state)
util.raiseNotDefined() if not actions:
return None
valueActions = [(self.getQValue(state, action), action)
for action in actions]
maxValue, maxAction = max(valueActions)
return maxAction
def getAction(self, state): def getAction(self, state):
""" """
@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
""" """
# Pick Action # Pick Action
legalActions = self.getLegalActions(state) legalActions = self.getLegalActions(state)
if not legalActions:
action = None action = None
"*** YOUR CODE HERE ***" elif util.flipCoin(self.epsilon):
util.raiseNotDefined() action = random.choice(legalActions)
else:
action = self.computeActionFromQValues(state)
return action return action
def update(self, state, action, nextState, reward): def update(self, state, action, nextState, reward):
@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
NOTE: You should never call this function, NOTE: You should never call this function,
it will be called on your behalf it will be called on your behalf
""" """
"*** YOUR CODE HERE ***" sample = reward + self.discount * self.computeValueFromQValues(nextState)
util.raiseNotDefined() value = self.getQValue(state, action)
newValue = (1 - self.alpha) * value + self.alpha * sample
self.values[(state, action)] = newValue
def getPolicy(self, state): def getPolicy(self, state):
return self.computeActionFromQValues(state) return self.computeActionFromQValues(state)
@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
Should return Q(state,action) = w * featureVector Should return Q(state,action) = w * featureVector
where * is the dotProduct operator where * is the dotProduct operator
""" """
"*** YOUR CODE HERE ***" features = self.featExtractor.getFeatures(state, action)
util.raiseNotDefined() value = 0
for feature, featureValue in features.items():
value += featureValue * self.weights[feature]
return value
def update(self, state, action, nextState, reward): def update(self, state, action, nextState, reward):
""" """
Should update your weights based on transition Should update your weights based on transition
""" """
"*** YOUR CODE HERE ***" features = self.featExtractor.getFeatures(state, action)
util.raiseNotDefined() qMax = self.computeValueFromQValues(nextState)
difference = (reward + self.discount * qMax) - self.getQValue(state, action)
for feature, featureValue in features.items():
weight = self.weights[feature]
newWeight = weight + self.alpha * difference * featureValue
self.weights[feature] = newWeight
def final(self, state): def final(self, state):
"Called at the end of each game." "Called at the end of each game."