Finish project 3.
This commit is contained in:
parent
8ba1f91f9b
commit
730d3672cc
@ -63,8 +63,7 @@ def question3e():
|
||||
def question6():
|
||||
answerEpsilon = None
|
||||
answerLearningRate = None
|
||||
return answerEpsilon, answerLearningRate
|
||||
# If not possible, return 'NOT POSSIBLE'
|
||||
return 'NOT POSSIBLE'
|
||||
|
||||
if __name__ == '__main__':
|
||||
print 'Answers to analysis questions:'
|
||||
|
@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
|
||||
def __init__(self, **args):
|
||||
"You can initialize Q-values here..."
|
||||
ReinforcementAgent.__init__(self, **args)
|
||||
|
||||
"*** YOUR CODE HERE ***"
|
||||
self.values = util.Counter()
|
||||
|
||||
def getQValue(self, state, action):
|
||||
"""
|
||||
@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
|
||||
Should return 0.0 if we have never seen a state
|
||||
or the Q node value otherwise
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
return self.values[(state, action)]
|
||||
|
||||
def computeValueFromQValues(self, state):
|
||||
"""
|
||||
@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
|
||||
there are no legal actions, which is the case at the
|
||||
terminal state, you should return a value of 0.0.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
actions = self.getLegalActions(state)
|
||||
if not actions:
|
||||
return 0.0
|
||||
return max([self.getQValue(state, action) for action in actions])
|
||||
|
||||
def computeActionFromQValues(self, state):
|
||||
"""
|
||||
@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
|
||||
are no legal actions, which is the case at the terminal state,
|
||||
you should return None.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
actions = self.getLegalActions(state)
|
||||
if not actions:
|
||||
return None
|
||||
valueActions = [(self.getQValue(state, action), action)
|
||||
for action in actions]
|
||||
maxValue, maxAction = max(valueActions)
|
||||
return maxAction
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
|
||||
"""
|
||||
# Pick Action
|
||||
legalActions = self.getLegalActions(state)
|
||||
action = None
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
|
||||
if not legalActions:
|
||||
action = None
|
||||
elif util.flipCoin(self.epsilon):
|
||||
action = random.choice(legalActions)
|
||||
else:
|
||||
action = self.computeActionFromQValues(state)
|
||||
return action
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
|
||||
NOTE: You should never call this function,
|
||||
it will be called on your behalf
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
sample = reward + self.discount * self.computeValueFromQValues(nextState)
|
||||
value = self.getQValue(state, action)
|
||||
newValue = (1 - self.alpha) * value + self.alpha * sample
|
||||
self.values[(state, action)] = newValue
|
||||
|
||||
def getPolicy(self, state):
|
||||
return self.computeActionFromQValues(state)
|
||||
@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
|
||||
Should return Q(state,action) = w * featureVector
|
||||
where * is the dotProduct operator
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
features = self.featExtractor.getFeatures(state, action)
|
||||
value = 0
|
||||
for feature, featureValue in features.items():
|
||||
value += featureValue * self.weights[feature]
|
||||
return value
|
||||
|
||||
|
||||
def update(self, state, action, nextState, reward):
|
||||
"""
|
||||
Should update your weights based on transition
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
util.raiseNotDefined()
|
||||
features = self.featExtractor.getFeatures(state, action)
|
||||
qMax = self.computeValueFromQValues(nextState)
|
||||
difference = (reward + self.discount * qMax) - self.getQValue(state, action)
|
||||
for feature, featureValue in features.items():
|
||||
weight = self.weights[feature]
|
||||
newWeight = weight + self.alpha * difference * featureValue
|
||||
self.weights[feature] = newWeight
|
||||
|
||||
def final(self, state):
|
||||
"Called at the end of each game."
|
||||
|
Loading…
Reference in New Issue
Block a user