intro2ai/p3_rl/analysis.py

# analysis.py
# -----------
# Licensing Information:  You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
# 
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).


######################
# ANALYSIS QUESTIONS #
######################

# Set the given parameters to obtain the specified policies through
# value iteration.

def question2():
    """
    Changing the discount value doesn't change the behavior of the agent because
    falling down cause too high of a penalty. Changing the noise to only one percent
    failure (noise=0.01) makese it safe enough to cross the bridge even with 0.8
    discount factor.
    """
    answerDiscount = 0.9
    answerNoise = 0.01
    return answerDiscount, answerNoise

def question3a():
    answerDiscount = 0.2
    answerNoise = 0
    answerLivingReward = 0
    return answerDiscount, answerNoise, answerLivingReward

def question3b():
    answerDiscount = 0.31622776601683794
    answerNoise = 0.2
    answerLivingReward = 0
    return answerDiscount, answerNoise, answerLivingReward

def question3c():
    answerDiscount = 0.9
    answerNoise = 0
    answerLivingReward = 0
    return answerDiscount, answerNoise, answerLivingReward

def question3d():
    answerDiscount = 0.9
    answerNoise = 0.2
    answerLivingReward = 0
    return answerDiscount, answerNoise, answerLivingReward

def question3e():
    answerDiscount = 0
    answerNoise = 0
    answerLivingReward = 1
    return answerDiscount, answerNoise, answerLivingReward

def question6():
    answerEpsilon = None
    answerLearningRate = None
    return answerEpsilon, answerLearningRate
    # If not possible, return 'NOT POSSIBLE'

if __name__ == '__main__':
    print 'Answers to analysis questions:'
    import analysis
    for q in [q for q in dir(analysis) if q.startswith('question')]:
        response = getattr(analysis, q)()
        print '  Question %s:\t%s' % (q, str(response))
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`# analysis.py`
			`# -----------`
			`# Licensing Information: You are free to use or extend these projects for`
			`# educational purposes provided that (1) you do not distribute or publish`
			`# solutions, (2) you retain this notice, and (3) you provide clear`
			`# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.`
			`#`
			`# Attribution Information: The Pacman AI projects were developed at UC Berkeley.`
			`# The core projects and autograders were primarily created by John DeNero`
			`# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).`
			`# Student side autograding was added by Brad Miller, Nick Hay, and`
			`# Pieter Abbeel (pabbeel@cs.berkeley.edu).`


			`######################`
			`# ANALYSIS QUESTIONS #`
			`######################`

			`# Set the given parameters to obtain the specified policies through`
			`# value iteration.`

			`def question2():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`"""`
			`Changing the discount value doesn't change the behavior of the agent because`
			`falling down cause too high of a penalty. Changing the noise to only one percent`
			`failure (noise=0.01) makese it safe enough to cross the bridge even with 0.8`
			`discount factor.`
			`"""`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`answerDiscount = 0.9`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerNoise = 0.01`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise`

			`def question3a():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerDiscount = 0.2`
			`answerNoise = 0`
			`answerLivingReward = 0`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise, answerLivingReward`

			`def question3b():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerDiscount = 0.31622776601683794`
			`answerNoise = 0.2`
			`answerLivingReward = 0`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise, answerLivingReward`

			`def question3c():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerDiscount = 0.9`
			`answerNoise = 0`
			`answerLivingReward = 0`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise, answerLivingReward`

			`def question3d():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerDiscount = 0.9`
			`answerNoise = 0.2`
			`answerLivingReward = 0`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise, answerLivingReward`

			`def question3e():`
Answer project 3 question 3. 2021-12-02 01:14:32 +01:00			`answerDiscount = 0`
			`answerNoise = 0`
			`answerLivingReward = 1`
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`return answerDiscount, answerNoise, answerLivingReward`

			`def question6():`
			`answerEpsilon = None`
			`answerLearningRate = None`
			`return answerEpsilon, answerLearningRate`
			`# If not possible, return 'NOT POSSIBLE'`

			`if __name__ == '__main__':`
			`print 'Answers to analysis questions:'`
			`import analysis`
			`for q in [q for q in dir(analysis) if q.startswith('question')]:`
			`response = getattr(analysis, q)()`
			`print ' Question %s:\t%s' % (q, str(response))`