Finish project 3.

main
Felix Martin 2021-12-02 12:14:45 -05:00
parent 8ba1f91f9b
commit 730d3672cc
2 changed files with 37 additions and 21 deletions

View File

@ -63,8 +63,7 @@ def question3e():
def question6():
answerEpsilon = None
answerLearningRate = None
return answerEpsilon, answerLearningRate
# If not possible, return 'NOT POSSIBLE'
return 'NOT POSSIBLE'
if __name__ == '__main__':
print 'Answers to analysis questions:'

View File

@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
def __init__(self, **args):
"You can initialize Q-values here..."
ReinforcementAgent.__init__(self, **args)
"*** YOUR CODE HERE ***"
self.values = util.Counter()
def getQValue(self, state, action):
"""
@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
Should return 0.0 if we have never seen a state
or the Q node value otherwise
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
return self.values[(state, action)]
def computeValueFromQValues(self, state):
"""
@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
there are no legal actions, which is the case at the
terminal state, you should return a value of 0.0.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
actions = self.getLegalActions(state)
if not actions:
return 0.0
return max([self.getQValue(state, action) for action in actions])
def computeActionFromQValues(self, state):
"""
@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
are no legal actions, which is the case at the terminal state,
you should return None.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
actions = self.getLegalActions(state)
if not actions:
return None
valueActions = [(self.getQValue(state, action), action)
for action in actions]
maxValue, maxAction = max(valueActions)
return maxAction
def getAction(self, state):
"""
@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
"""
# Pick Action
legalActions = self.getLegalActions(state)
action = None
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
if not legalActions:
action = None
elif util.flipCoin(self.epsilon):
action = random.choice(legalActions)
else:
action = self.computeActionFromQValues(state)
return action
def update(self, state, action, nextState, reward):
@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
NOTE: You should never call this function,
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
sample = reward + self.discount * self.computeValueFromQValues(nextState)
value = self.getQValue(state, action)
newValue = (1 - self.alpha) * value + self.alpha * sample
self.values[(state, action)] = newValue
def getPolicy(self, state):
return self.computeActionFromQValues(state)
@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
Should return Q(state,action) = w * featureVector
where * is the dotProduct operator
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
features = self.featExtractor.getFeatures(state, action)
value = 0
for feature, featureValue in features.items():
value += featureValue * self.weights[feature]
return value
def update(self, state, action, nextState, reward):
"""
Should update your weights based on transition
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
features = self.featExtractor.getFeatures(state, action)
qMax = self.computeValueFromQValues(nextState)
difference = (reward + self.discount * qMax) - self.getQValue(state, action)
for feature, featureValue in features.items():
weight = self.weights[feature]
newWeight = weight + self.alpha * difference * featureValue
self.weights[feature] = newWeight
def final(self, state):
"Called at the end of each game."