Finish project 3.
parent
8ba1f91f9b
commit
730d3672cc
|
@ -63,8 +63,7 @@ def question3e():
|
||||||
def question6():
|
def question6():
|
||||||
answerEpsilon = None
|
answerEpsilon = None
|
||||||
answerLearningRate = None
|
answerLearningRate = None
|
||||||
return answerEpsilon, answerLearningRate
|
return 'NOT POSSIBLE'
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print 'Answers to analysis questions:'
|
print 'Answers to analysis questions:'
|
||||||
|
|
|
@ -41,8 +41,7 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
def __init__(self, **args):
|
def __init__(self, **args):
|
||||||
"You can initialize Q-values here..."
|
"You can initialize Q-values here..."
|
||||||
ReinforcementAgent.__init__(self, **args)
|
ReinforcementAgent.__init__(self, **args)
|
||||||
|
self.values = util.Counter()
|
||||||
"*** YOUR CODE HERE ***"
|
|
||||||
|
|
||||||
def getQValue(self, state, action):
|
def getQValue(self, state, action):
|
||||||
"""
|
"""
|
||||||
|
@ -50,9 +49,7 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
Should return 0.0 if we have never seen a state
|
Should return 0.0 if we have never seen a state
|
||||||
or the Q node value otherwise
|
or the Q node value otherwise
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
return self.values[(state, action)]
|
||||||
util.raiseNotDefined()
|
|
||||||
|
|
||||||
|
|
||||||
def computeValueFromQValues(self, state):
|
def computeValueFromQValues(self, state):
|
||||||
"""
|
"""
|
||||||
|
@ -61,8 +58,10 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
there are no legal actions, which is the case at the
|
there are no legal actions, which is the case at the
|
||||||
terminal state, you should return a value of 0.0.
|
terminal state, you should return a value of 0.0.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
actions = self.getLegalActions(state)
|
||||||
util.raiseNotDefined()
|
if not actions:
|
||||||
|
return 0.0
|
||||||
|
return max([self.getQValue(state, action) for action in actions])
|
||||||
|
|
||||||
def computeActionFromQValues(self, state):
|
def computeActionFromQValues(self, state):
|
||||||
"""
|
"""
|
||||||
|
@ -70,8 +69,13 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
are no legal actions, which is the case at the terminal state,
|
are no legal actions, which is the case at the terminal state,
|
||||||
you should return None.
|
you should return None.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
actions = self.getLegalActions(state)
|
||||||
util.raiseNotDefined()
|
if not actions:
|
||||||
|
return None
|
||||||
|
valueActions = [(self.getQValue(state, action), action)
|
||||||
|
for action in actions]
|
||||||
|
maxValue, maxAction = max(valueActions)
|
||||||
|
return maxAction
|
||||||
|
|
||||||
def getAction(self, state):
|
def getAction(self, state):
|
||||||
"""
|
"""
|
||||||
|
@ -86,10 +90,12 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
"""
|
"""
|
||||||
# Pick Action
|
# Pick Action
|
||||||
legalActions = self.getLegalActions(state)
|
legalActions = self.getLegalActions(state)
|
||||||
|
if not legalActions:
|
||||||
action = None
|
action = None
|
||||||
"*** YOUR CODE HERE ***"
|
elif util.flipCoin(self.epsilon):
|
||||||
util.raiseNotDefined()
|
action = random.choice(legalActions)
|
||||||
|
else:
|
||||||
|
action = self.computeActionFromQValues(state)
|
||||||
return action
|
return action
|
||||||
|
|
||||||
def update(self, state, action, nextState, reward):
|
def update(self, state, action, nextState, reward):
|
||||||
|
@ -101,8 +107,10 @@ class QLearningAgent(ReinforcementAgent):
|
||||||
NOTE: You should never call this function,
|
NOTE: You should never call this function,
|
||||||
it will be called on your behalf
|
it will be called on your behalf
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
sample = reward + self.discount * self.computeValueFromQValues(nextState)
|
||||||
util.raiseNotDefined()
|
value = self.getQValue(state, action)
|
||||||
|
newValue = (1 - self.alpha) * value + self.alpha * sample
|
||||||
|
self.values[(state, action)] = newValue
|
||||||
|
|
||||||
def getPolicy(self, state):
|
def getPolicy(self, state):
|
||||||
return self.computeActionFromQValues(state)
|
return self.computeActionFromQValues(state)
|
||||||
|
@ -164,15 +172,24 @@ class ApproximateQAgent(PacmanQAgent):
|
||||||
Should return Q(state,action) = w * featureVector
|
Should return Q(state,action) = w * featureVector
|
||||||
where * is the dotProduct operator
|
where * is the dotProduct operator
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
features = self.featExtractor.getFeatures(state, action)
|
||||||
util.raiseNotDefined()
|
value = 0
|
||||||
|
for feature, featureValue in features.items():
|
||||||
|
value += featureValue * self.weights[feature]
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def update(self, state, action, nextState, reward):
|
def update(self, state, action, nextState, reward):
|
||||||
"""
|
"""
|
||||||
Should update your weights based on transition
|
Should update your weights based on transition
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
features = self.featExtractor.getFeatures(state, action)
|
||||||
util.raiseNotDefined()
|
qMax = self.computeValueFromQValues(nextState)
|
||||||
|
difference = (reward + self.discount * qMax) - self.getQValue(state, action)
|
||||||
|
for feature, featureValue in features.items():
|
||||||
|
weight = self.weights[feature]
|
||||||
|
newWeight = weight + self.alpha * difference * featureValue
|
||||||
|
self.weights[feature] = newWeight
|
||||||
|
|
||||||
def final(self, state):
|
def final(self, state):
|
||||||
"Called at the end of each game."
|
"Called at the end of each game."
|
||||||
|
|
Loading…
Reference in New Issue