Add project 3 RL template.

2021-11-27 10:16:51 -05:00
parent a4031888de
commit 524362c5c5
100 changed files with 16641 additions and 0 deletions
@@ -0,0 +1 @@
 v1.001
@@ -0,0 +1,73 @@
 # analysis.py
 # -----------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 ######################
 # ANALYSIS QUESTIONS #
 ######################
 # Set the given parameters to obtain the specified policies through
 # value iteration.
 def question2():
    answerDiscount = 0.9
    answerNoise = 0.2
    return answerDiscount, answerNoise
 def question3a():
    answerDiscount = None
    answerNoise = None
    answerLivingReward = None
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
 def question3b():
    answerDiscount = None
    answerNoise = None
    answerLivingReward = None
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
 def question3c():
    answerDiscount = None
    answerNoise = None
    answerLivingReward = None
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
 def question3d():
    answerDiscount = None
    answerNoise = None
    answerLivingReward = None
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
 def question3e():
    answerDiscount = None
    answerNoise = None
    answerLivingReward = None
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
 def question6():
    answerEpsilon = None
    answerLearningRate = None
    return answerEpsilon, answerLearningRate
    # If not possible, return 'NOT POSSIBLE'
 if __name__ == '__main__':
    print 'Answers to analysis questions:'
    import analysis
    for q in [q for q in dir(analysis) if q.startswith('question')]:
        response = getattr(analysis, q)()
        print '  Question %s:\t%s' % (q, str(response))
@@ -0,0 +1,351 @@
 # autograder.py
 # -------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 # imports from python standard library
 import grading
 import imp
 import optparse
 import os
 import re
 import sys
 import projectParams
 import random
 random.seed(0)
 try: 
    from pacman import GameState
 except:
    pass
 # register arguments and set default values
 def readCommand(argv):
    parser = optparse.OptionParser(description = 'Run public tests on student code')
    parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False)
    parser.add_option('--test-directory',
                      dest = 'testRoot',
                      default = 'test_cases',
                      help = 'Root test directory which contains subdirectories corresponding to each question')
    parser.add_option('--student-code',
                      dest = 'studentCode',
                      default = projectParams.STUDENT_CODE_DEFAULT,
                      help = 'comma separated list of student code files')
    parser.add_option('--code-directory',
                    dest = 'codeRoot',
                    default = "",
                    help = 'Root directory containing the student and testClass code')
    parser.add_option('--test-case-code',
                      dest = 'testCaseCode',
                      default = projectParams.PROJECT_TEST_CLASSES,
                      help = 'class containing testClass classes for this project')
    parser.add_option('--generate-solutions',
                      dest = 'generateSolutions',
                      action = 'store_true',
                      help = 'Write solutions generated to .solution file')
    parser.add_option('--edx-output',
                    dest = 'edxOutput',
                    action = 'store_true',
                    help = 'Generate edX output files')
    parser.add_option('--mute',
                    dest = 'muteOutput',
                    action = 'store_true',
                    help = 'Mute output from executing tests')
    parser.add_option('--print-tests', '-p',
                    dest = 'printTestCase',
                    action = 'store_true',
                    help = 'Print each test case before running them.')
    parser.add_option('--test', '-t',
                      dest = 'runTest',
                      default = None,
                      help = 'Run one particular test.  Relative to test root.')
    parser.add_option('--question', '-q',
                    dest = 'gradeQuestion',
                    default = None,
                    help = 'Grade one particular question.')
    parser.add_option('--no-graphics',
                    dest = 'noGraphics',
                    action = 'store_true',
                    help = 'No graphics display for pacman games.')
    (options, args) = parser.parse_args(argv)
    return options
 # confirm we should author solution files
 def confirmGenerate():
    print 'WARNING: this action will overwrite any solution files.'
    print 'Are you sure you want to proceed? (yes/no)'
    while True:
        ans = sys.stdin.readline().strip()
        if ans == 'yes':
            break
        elif ans == 'no':
            sys.exit(0)
        else:
            print 'please answer either "yes" or "no"'
 # TODO: Fix this so that it tracebacks work correctly
 # Looking at source of the traceback module, presuming it works
 # the same as the intepreters, it uses co_filename.  This is,
 # however, a readonly attribute.
 def setModuleName(module, filename):
    functionType = type(confirmGenerate)
    classType = type(optparse.Option)
    for i in dir(module):
        o = getattr(module, i)
        if hasattr(o, '__file__'): continue
        if type(o) == functionType:
            setattr(o, '__file__', filename)
        elif type(o) == classType:
            setattr(o, '__file__', filename)
            # TODO: assign member __file__'s?
        #print i, type(o)
 #from cStringIO import StringIO
 def loadModuleString(moduleSource):
    # Below broken, imp doesn't believe its being passed a file:
    #    ValueError: load_module arg#2 should be a file or None
    #
    #f = StringIO(moduleCodeDict[k])
    #tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
    tmp = imp.new_module(k)
    exec moduleCodeDict[k] in tmp.__dict__
    setModuleName(tmp, k)
    return tmp
 import py_compile
 def loadModuleFile(moduleName, filePath):
    with open(filePath, 'r') as f:
        return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
 def readFile(path, root=""):
    "Read file from disk at specified path and return as string"
    with open(os.path.join(root, path), 'r') as handle:
        return handle.read()
 #######################################################################
 # Error Hint Map
 #######################################################################
 # TODO: use these
 ERROR_HINT_MAP = {
  'q1': {
    "<type 'exceptions.IndexError'>": """
      We noticed that your project threw an IndexError on q1.
      While many things may cause this, it may have been from
      assuming a certain number of successors from a state space
      or assuming a certain number of actions available from a given
      state. Try making your code more general (no hardcoded indices)
      and submit again!
    """
  },
  'q3': {
      "<type 'exceptions.AttributeError'>": """
        We noticed that your project threw an AttributeError on q3.
        While many things may cause this, it may have been from assuming
        a certain size or structure to the state space. For example, if you have
        a line of code assuming that the state is (x, y) and we run your code
        on a state space with (x, y, z), this error could be thrown. Try
        making your code more general and submit again!
    """
  }
 }
 import pprint
 def splitStrings(d):
    d2 = dict(d)
    for k in d:
        if k[0:2] == "__":
            del d2[k]
            continue
        if d2[k].find("\n") >= 0:
            d2[k] = d2[k].split("\n")
    return d2
 def printTest(testDict, solutionDict):
    pp = pprint.PrettyPrinter(indent=4)
    print "Test case:"
    for line in testDict["__raw_lines__"]:
        print "   |", line
    print "Solution:"
    for line in solutionDict["__raw_lines__"]:
        print "   |", line
 def runTest(testName, moduleDict, printTestCase=False, display=None):
    import testParser
    import testClasses
    for module in moduleDict:
        setattr(sys.modules[__name__], module, moduleDict[module])
    testDict = testParser.TestParser(testName + ".test").parse()
    solutionDict = testParser.TestParser(testName + ".solution").parse()
    test_out_file = os.path.join('%s.test_output' % testName)
    testDict['test_out_file'] = test_out_file
    testClass = getattr(projectTestClasses, testDict['class'])
    questionClass = getattr(testClasses, 'Question')
    question = questionClass({'max_points': 0}, display)
    testCase = testClass(question, testDict)
    if printTestCase:
        printTest(testDict, solutionDict)
    # This is a fragile hack to create a stub grades object
    grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
    testCase.execute(grades, moduleDict, solutionDict)
 # returns all the tests you need to run in order to run question
 def getDepends(testParser, testRoot, question):
    allDeps = [question]
    questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse()
    if 'depends' in questionDict:
        depends = questionDict['depends'].split()
        for d in depends:
            # run dependencies first
            allDeps = getDepends(testParser, testRoot, d) + allDeps
    return allDeps
 # get list of questions to grade
 def getTestSubdirs(testParser, testRoot, questionToGrade):
    problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
    if questionToGrade != None:
        questions = getDepends(testParser, testRoot, questionToGrade)
        if len(questions) > 1:
            print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions)
        return questions
    if 'order' in problemDict:
        return problemDict['order'].split()
    return sorted(os.listdir(testRoot))
 # evaluate student code
 def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False,
            printTestCase=False, questionToGrade=None, display=None):
    # imports of testbench code.  note that the testClasses import must follow
    # the import of student code due to dependencies
    import testParser
    import testClasses
    for module in moduleDict:
        setattr(sys.modules[__name__], module, moduleDict[module])
    questions = []
    questionDicts = {}
    test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade)
    for q in test_subdirs:
        subdir_path = os.path.join(testRoot, q)
        if not os.path.isdir(subdir_path) or q[0] == '.':
            continue
        # create a question object
        questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
        questionClass = getattr(testClasses, questionDict['class'])
        question = questionClass(questionDict, display)
        questionDicts[q] = questionDict
        # load test cases into question
        tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
        tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
        for t in sorted(tests):
            test_file = os.path.join(subdir_path, '%s.test' % t)
            solution_file = os.path.join(subdir_path, '%s.solution' % t)
            test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
            testDict = testParser.TestParser(test_file).parse()
            if testDict.get("disabled", "false").lower() == "true":
                continue
            testDict['test_out_file'] = test_out_file
            testClass = getattr(projectTestClasses, testDict['class'])
            testCase = testClass(question, testDict)
            def makefun(testCase, solution_file):
                if generateSolutions:
                    # write solution file to disk
                    return lambda grades: testCase.writeSolution(moduleDict, solution_file)
                else:
                    # read in solution dictionary and pass as an argument
                    testDict = testParser.TestParser(test_file).parse()
                    solutionDict = testParser.TestParser(solution_file).parse()
                    if printTestCase:
                        return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
                    else:
                        return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
            question.addTestCase(testCase, makefun(testCase, solution_file))
        # Note extra function is necessary for scoping reasons
        def makefun(question):
            return lambda grades: question.execute(grades)
        setattr(sys.modules[__name__], q, makefun(question))
        questions.append((q, question.getMaxPoints()))
    grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
    if questionToGrade == None:
        for q in questionDicts:
            for prereq in questionDicts[q].get('depends', '').split():
                grades.addPrereq(q, prereq)
    grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC)
    return grades.points
 def getDisplay(graphicsByDefault, options=None):
    graphics = graphicsByDefault
    if options is not None and options.noGraphics:
        graphics = False
    if graphics:
        try:
            import graphicsDisplay
            return graphicsDisplay.PacmanGraphics(1, frameTime=.05)
        except ImportError:
            pass
    import textDisplay
    return textDisplay.NullGraphics()
 if __name__ == '__main__':
    options = readCommand(sys.argv)
    if options.generateSolutions:
        confirmGenerate()
    codePaths = options.studentCode.split(',')
    # moduleCodeDict = {}
    # for cp in codePaths:
    #     moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
    #     moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
    # moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
    # moduleDict = loadModuleDict(moduleCodeDict)
    moduleDict = {}
    for cp in codePaths:
        moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
        moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
    moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)
    moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))
    if options.runTest != None:
        runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options))
    else:
        evaluate(options.generateSolutions, options.testRoot, moduleDict,
            edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
            questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options))
@@ -0,0 +1,384 @@
 # crawler.py
 # ----------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 #!/usr/bin/python
 import math
 from math import pi as PI
 import time
 import environment
 import random
 class CrawlingRobotEnvironment(environment.Environment):
    def __init__(self, crawlingRobot):
        self.crawlingRobot = crawlingRobot
        # The state is of the form (armAngle, handAngle)
        # where the angles are bucket numbers, not actual
        # degree measurements
        self.state = None
        self.nArmStates = 9
        self.nHandStates = 13
        # create a list of arm buckets and hand buckets to
        # discretize the state space
        minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
        minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
        armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1)
        handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1)
        self.armBuckets = [minArmAngle+(armIncrement*i) \
           for i in range(self.nArmStates)]
        self.handBuckets = [minHandAngle+(handIncrement*i) \
         for i in range(self.nHandStates)]
        # Reset
        self.reset()
    def getCurrentState(self):
        """
          Return the current state
          of the crawling robot
        """
        return self.state
    def getPossibleActions(self, state):
        """
          Returns possible actions
          for the states in the
          current state
        """
        actions = list()
        currArmBucket,currHandBucket = state
        if currArmBucket > 0: actions.append('arm-down')
        if currArmBucket < self.nArmStates-1: actions.append('arm-up')
        if currHandBucket > 0: actions.append('hand-down')
        if currHandBucket < self.nHandStates-1: actions.append('hand-up')
        return actions
    def doAction(self, action):
        """
          Perform the action and update
          the current state of the Environment
          and return the reward for the
          current state, the next state
          and the taken action.
          Returns:
            nextState, reward
        """
        nextState, reward =  None, None
        oldX,oldY = self.crawlingRobot.getRobotPosition()
        armBucket,handBucket = self.state
        armAngle,handAngle = self.crawlingRobot.getAngles()
        if action == 'arm-up':
            newArmAngle = self.armBuckets[armBucket+1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket+1,handBucket)
        if action == 'arm-down':
            newArmAngle = self.armBuckets[armBucket-1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket-1,handBucket)
        if action == 'hand-up':
            newHandAngle = self.handBuckets[handBucket+1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket+1)
        if action == 'hand-down':
            newHandAngle = self.handBuckets[handBucket-1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket-1)
        newX,newY = self.crawlingRobot.getRobotPosition()
        # a simple reward function
        reward = newX - oldX
        self.state = nextState
        return nextState, reward
    def reset(self):
        """
         Resets the Environment to the initial state
        """
        ## Initialize the state to be the middle
        ## value for each parameter e.g. if there are 13 and 19
        ## buckets for the arm and hand parameters, then the intial
        ## state should be (6,9)
        ##
        ## Also call self.crawlingRobot.setAngles()
        ## to the initial arm and hand angle
        armState = self.nArmStates/2
        handState = self.nHandStates/2
        self.state = armState,handState
        self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
        self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
 class CrawlingRobot:
    def setAngles(self, armAngle, handAngle):
        """
            set the robot's arm and hand angles
            to the passed in values
        """
        self.armAngle = armAngle
        self.handAngle = handAngle
    def getAngles(self):
        """
            returns the pair of (armAngle, handAngle)
        """
        return self.armAngle, self.handAngle
    def getRobotPosition(self):
        """
            returns the (x,y) coordinates
            of the lower-left point of the
            robot
        """
        return self.robotPos
    def moveArm(self, newArmAngle):
        """
            move the robot arm to 'newArmAngle'
        """
        oldArmAngle = self.armAngle
        if newArmAngle > self.maxArmAngle:
            raise 'Crawling Robot: Arm Raised too high. Careful!'
        if newArmAngle < self.minArmAngle:
            raise 'Crawling Robot: Arm Raised too low. Careful!'
        disp = self.displacement(self.armAngle, self.handAngle,
                                  newArmAngle, self.handAngle)
        curXPos = self.robotPos[0]
        self.robotPos = (curXPos+disp, self.robotPos[1])
        self.armAngle = newArmAngle
        # Position and Velocity Sign Post
        self.positions.append(self.getRobotPosition()[0])
 #        self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle)))
        if len(self.positions) > 100:
            self.positions.pop(0)
 #           self.angleSums.pop(0)
    def moveHand(self, newHandAngle):
        """
            move the robot hand to 'newArmAngle'
        """
        oldHandAngle = self.handAngle
        if newHandAngle > self.maxHandAngle:
            raise 'Crawling Robot: Hand Raised too high. Careful!'
        if newHandAngle < self.minHandAngle:
            raise 'Crawling Robot: Hand Raised too low. Careful!'
        disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
        curXPos = self.robotPos[0]
        self.robotPos = (curXPos+disp, self.robotPos[1])
        self.handAngle = newHandAngle
        # Position and Velocity Sign Post
        self.positions.append(self.getRobotPosition()[0])
 #       self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle)))
        if len(self.positions) > 100:
            self.positions.pop(0)
 #           self.angleSums.pop(0)
    def getMinAndMaxArmAngles(self):
        """
            get the lower- and upper- bound
            for the arm angles returns (min,max) pair
        """
        return self.minArmAngle, self.maxArmAngle
    def getMinAndMaxHandAngles(self):
        """
            get the lower- and upper- bound
            for the hand angles returns (min,max) pair
        """
        return self.minHandAngle, self.maxHandAngle
    def getRotationAngle(self):
        """
            get the current angle the
            robot body is rotated off the ground
        """
        armCos, armSin = self.__getCosAndSin(self.armAngle)
        handCos, handSin = self.__getCosAndSin(self.handAngle)
        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
        if y < 0:
            return math.atan(-y/x)
        return 0.0
    ## You shouldn't need methods below here
    def __getCosAndSin(self, angle):
        return math.cos(angle), math.sin(angle)
    def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
        oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
        armCos, armSin = self.__getCosAndSin(armDegree)
        oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
        handCos, handSin = self.__getCosAndSin(handDegree)
        xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
        yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
        if y < 0:
            if yOld <= 0:
                return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y)
            return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y)
        else:
            if yOld  >= 0:
                return 0.0
            return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld)
        raise 'Never Should See This!'
    def draw(self, stepCount, stepDelay):
        x1, y1 = self.getRobotPosition()
        x1 = x1 % self.totWidth
        ## Check Lower Still on the ground
        if y1 != self.groundY:
            raise 'Flying Robot!!'
        rotationAngle = self.getRotationAngle()
        cosRot, sinRot = self.__getCosAndSin(rotationAngle)
        x2 = x1 + self.robotWidth * cosRot
        y2 = y1 - self.robotWidth * sinRot
        x3 = x1 - self.robotHeight * sinRot
        y3 = y1 - self.robotHeight * cosRot
        x4 = x3 + cosRot*self.robotWidth
        y4 = y3 - sinRot*self.robotWidth
        self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3)
        armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle)
        xArm = x4 + self.armLength * armCos
        yArm = y4 - self.armLength * armSin
        self.canvas.coords(self.robotArm,x4,y4,xArm,yArm)
        handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle)
        xHand = xArm + self.handLength * handCos
        yHand = yArm - self.handLength * handSin
        self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand)
        # Position and Velocity Sign Post
 #        time = len(self.positions) + 0.5 * sum(self.angleSums)
 #        velocity = (self.positions[-1]-self.positions[0]) / time
 #        if len(self.positions) == 1: return
        steps = (stepCount - self.lastStep)
        if steps==0:return
 #       pos = self.positions[-1]
 #        velocity = (pos - self.lastPos) / steps
  #      g = .9 ** (10 * stepDelay)
 #        g = .99 ** steps
 #        self.velAvg = g * self.velAvg + (1 - g) * velocity
 #       g = .999 ** steps
 #       self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
        pos = self.positions[-1]
        velocity = pos - self.positions[-2]
        vel2 = (pos - self.positions[0]) / len(self.positions)
        self.velAvg = .9 * self.velAvg + .1 * vel2
        velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
 #        velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
        velocityMsg = 'Velocity: %.2f' % velocity
        positionMsg = 'Position: %2.f' % pos
        stepMsg = 'Step: %d' % stepCount
        if 'vel_msg' in dir(self):
            self.canvas.delete(self.vel_msg)
            self.canvas.delete(self.pos_msg)
            self.canvas.delete(self.step_msg)
            self.canvas.delete(self.velavg_msg)
 #           self.canvas.delete(self.velavg2_msg)
 #       self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2)
        self.velavg_msg = self.canvas.create_text(650,190,text=velMsg)
        self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg)
        self.pos_msg = self.canvas.create_text(250,190,text=positionMsg)
        self.step_msg = self.canvas.create_text(50,190,text=stepMsg)
 #        self.lastPos = pos
        self.lastStep = stepCount
 #        self.lastVel = velocity
    def __init__(self, canvas):
        ## Canvas ##
        self.canvas = canvas
        self.velAvg = 0
 #        self.velAvg2 = 0
 #        self.lastPos = 0
        self.lastStep = 0
 #        self.lastVel = 0
        ## Arm and Hand Degrees ##
        self.armAngle = self.oldArmDegree = 0.0
        self.handAngle = self.oldHandDegree = -PI/6
        self.maxArmAngle = PI/6
        self.minArmAngle = -PI/6
        self.maxHandAngle = 0
        self.minHandAngle = -(5.0/6.0) * PI
        ## Draw Ground ##
        self.totWidth = canvas.winfo_reqwidth()
        self.totHeight = canvas.winfo_reqheight()
        self.groundHeight = 40
        self.groundY = self.totHeight - self.groundHeight
        self.ground = canvas.create_rectangle(0,
            self.groundY,self.totWidth,self.totHeight, fill='blue')
        ## Robot Body ##
        self.robotWidth = 80
        self.robotHeight = 40
        self.robotPos = (20, self.groundY)
        self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green')
        ## Robot Arm ##
        self.armLength = 60
        self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5)
        ## Robot Hand ##
        self.handLength = 40
        self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3)
        self.positions = [0,0]
  #      self.angleSums = [0,0]
 if __name__ == '__main__':
    from graphicsCrawlerDisplay import *
    run()
@@ -0,0 +1,56 @@
 # environment.py
 # --------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 #!/usr/bin/python
 class Environment:
    def getCurrentState(self):
        """
        Returns the current state of enviornment
        """
        abstract
    def getPossibleActions(self, state):
        """
          Returns possible actions the agent
          can take in the given state. Can
          return the empty list if we are in
          a terminal state.
        """
        abstract
    def doAction(self, action):
        """
          Performs the given action in the current
          environment state and updates the enviornment.
          Returns a (reward, nextState) pair
        """
        abstract
    def reset(self):
        """
          Resets the current state to the start state
        """
        abstract
    def isTerminal(self):
        """
          Has the enviornment entered a terminal
          state? This means there are no successors
        """
        state = self.getCurrentState()
        actions = self.getPossibleActions(state)
        return len(actions) == 0
@@ -0,0 +1,103 @@
 # featureExtractors.py
 # --------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 "Feature extractors for Pacman game states"
 from game import Directions, Actions
 import util
 class FeatureExtractor:
    def getFeatures(self, state, action):
        """
          Returns a dict from features to counts
          Usually, the count will just be 1.0 for
          indicator functions.
        """
        util.raiseNotDefined()
 class IdentityExtractor(FeatureExtractor):
    def getFeatures(self, state, action):
        feats = util.Counter()
        feats[(state,action)] = 1.0
        return feats
 class CoordinateExtractor(FeatureExtractor):
    def getFeatures(self, state, action):
        feats = util.Counter()
        feats[state] = 1.0
        feats['x=%d' % state[0]] = 1.0
        feats['y=%d' % state[0]] = 1.0
        feats['action=%s' % action] = 1.0
        return feats
 def closestFood(pos, food, walls):
    """
    closestFood -- this is similar to the function that we have
    worked on in the search project; here its all in one place
    """
    fringe = [(pos[0], pos[1], 0)]
    expanded = set()
    while fringe:
        pos_x, pos_y, dist = fringe.pop(0)
        if (pos_x, pos_y) in expanded:
            continue
        expanded.add((pos_x, pos_y))
        # if we find a food at this location then exit
        if food[pos_x][pos_y]:
            return dist
        # otherwise spread out from the location to its neighbours
        nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
        for nbr_x, nbr_y in nbrs:
            fringe.append((nbr_x, nbr_y, dist+1))
    # no food found
    return None
 class SimpleExtractor(FeatureExtractor):
    """
    Returns simple features for a basic reflex Pacman:
    - whether food will be eaten
    - how far away the next food is
    - whether a ghost collision is imminent
    - whether a ghost is one step away
    """
    def getFeatures(self, state, action):
        # extract the grid of food and wall locations and get the ghost locations
        food = state.getFood()
        walls = state.getWalls()
        ghosts = state.getGhostPositions()
        features = util.Counter()
        features["bias"] = 1.0
        # compute the location of pacman after he takes the action
        x, y = state.getPacmanPosition()
        dx, dy = Actions.directionToVector(action)
        next_x, next_y = int(x + dx), int(y + dy)
        # count the number of ghosts 1-step away
        features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
        # if there is no danger of ghosts then add the food feature
        if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
            features["eats-food"] = 1.0
        dist = closestFood((next_x, next_y), food, walls)
        if dist is not None:
            # make the distance a number less than one otherwise the update
            # will diverge wildly
            features["closest-food"] = float(dist) / (walls.width * walls.height)
        features.divideAll(10.0)
        return features
@@ -0,0 +1,729 @@
 # game.py
 # -------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 # game.py
 # -------
 # Licensing Information: Please do not distribute or publish solutions to this
 # project. You are free to use and extend these projects for educational
 # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
 from util import *
 import time, os
 import traceback
 import sys
 #######################
 # Parts worth reading #
 #######################
 class Agent:
    """
    An agent must define a getAction method, but may also define the
    following methods which will be called if they exist:
    def registerInitialState(self, state): # inspects the starting state
    """
    def __init__(self, index=0):
        self.index = index
    def getAction(self, state):
        """
        The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
        must return an action from Directions.{North, South, East, West, Stop}
        """
        raiseNotDefined()
 class Directions:
    NORTH = 'North'
    SOUTH = 'South'
    EAST = 'East'
    WEST = 'West'
    STOP = 'Stop'
    LEFT =       {NORTH: WEST,
                   SOUTH: EAST,
                   EAST:  NORTH,
                   WEST:  SOUTH,
                   STOP:  STOP}
    RIGHT =      dict([(y,x) for x, y in LEFT.items()])
    REVERSE = {NORTH: SOUTH,
               SOUTH: NORTH,
               EAST: WEST,
               WEST: EAST,
               STOP: STOP}
 class Configuration:
    """
    A Configuration holds the (x,y) coordinate of a character, along with its
    traveling direction.
    The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases
    horizontally and y increases vertically.  Therefore, north is the direction of increasing y, or (0,1).
    """
    def __init__(self, pos, direction):
        self.pos = pos
        self.direction = direction
    def getPosition(self):
        return (self.pos)
    def getDirection(self):
        return self.direction
    def isInteger(self):
        x,y = self.pos
        return x == int(x) and y == int(y)
    def __eq__(self, other):
        if other == None: return False
        return (self.pos == other.pos and self.direction == other.direction)
    def __hash__(self):
        x = hash(self.pos)
        y = hash(self.direction)
        return hash(x + 13 * y)
    def __str__(self):
        return "(x,y)="+str(self.pos)+", "+str(self.direction)
    def generateSuccessor(self, vector):
        """
        Generates a new configuration reached by translating the current
        configuration by the action vector.  This is a low-level call and does
        not attempt to respect the legality of the movement.
        Actions are movement vectors.
        """
        x, y= self.pos
        dx, dy = vector
        direction = Actions.vectorToDirection(vector)
        if direction == Directions.STOP:
            direction = self.direction # There is no stop direction
        return Configuration((x + dx, y+dy), direction)
 class AgentState:
    """
    AgentStates hold the state of an agent (configuration, speed, scared, etc).
    """
    def __init__( self, startConfiguration, isPacman ):
        self.start = startConfiguration
        self.configuration = startConfiguration
        self.isPacman = isPacman
        self.scaredTimer = 0
        self.numCarrying = 0
        self.numReturned = 0
    def __str__( self ):
        if self.isPacman:
            return "Pacman: " + str( self.configuration )
        else:
            return "Ghost: " + str( self.configuration )
    def __eq__( self, other ):
        if other == None:
            return False
        return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer
    def __hash__(self):
        return hash(hash(self.configuration) + 13 * hash(self.scaredTimer))
    def copy( self ):
        state = AgentState( self.start, self.isPacman )
        state.configuration = self.configuration
        state.scaredTimer = self.scaredTimer
        state.numCarrying = self.numCarrying
        state.numReturned = self.numReturned
        return state
    def getPosition(self):
        if self.configuration == None: return None
        return self.configuration.getPosition()
    def getDirection(self):
        return self.configuration.getDirection()
 class Grid:
    """
    A 2-dimensional array of objects backed by a list of lists.  Data is accessed
    via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal,
    y vertical and the origin (0,0) in the bottom left corner.
    The __str__ method constructs an output that is oriented like a pacman board.
    """
    def __init__(self, width, height, initialValue=False, bitRepresentation=None):
        if initialValue not in [False, True]: raise Exception('Grids can only contain booleans')
        self.CELLS_PER_INT = 30
        self.width = width
        self.height = height
        self.data = [[initialValue for y in range(height)] for x in range(width)]
        if bitRepresentation:
            self._unpackBits(bitRepresentation)
    def __getitem__(self, i):
        return self.data[i]
    def __setitem__(self, key, item):
        self.data[key] = item
    def __str__(self):
        out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)]
        out.reverse()
        return '\n'.join([''.join(x) for x in out])
    def __eq__(self, other):
        if other == None: return False
        return self.data == other.data
    def __hash__(self):
        # return hash(str(self))
        base = 1
        h = 0
        for l in self.data:
            for i in l:
                if i:
                    h += base
                base *= 2
        return hash(h)
    def copy(self):
        g = Grid(self.width, self.height)
        g.data = [x[:] for x in self.data]
        return g
    def deepCopy(self):
        return self.copy()
    def shallowCopy(self):
        g = Grid(self.width, self.height)
        g.data = self.data
        return g
    def count(self, item =True ):
        return sum([x.count(item) for x in self.data])
    def asList(self, key = True):
        list = []
        for x in range(self.width):
            for y in range(self.height):
                if self[x][y] == key: list.append( (x,y) )
        return list
    def packBits(self):
        """
        Returns an efficient int list representation
        (width, height, bitPackedInts...)
        """
        bits = [self.width, self.height]
        currentInt = 0
        for i in range(self.height * self.width):
            bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1
            x, y = self._cellIndexToPosition(i)
            if self[x][y]:
                currentInt += 2 ** bit
            if (i + 1) % self.CELLS_PER_INT == 0:
                bits.append(currentInt)
                currentInt = 0
        bits.append(currentInt)
        return tuple(bits)
    def _cellIndexToPosition(self, index):
        x = index / self.height
        y = index % self.height
        return x, y
    def _unpackBits(self, bits):
        """
        Fills in data from a bit-level representation
        """
        cell = 0
        for packed in bits:
            for bit in self._unpackInt(packed, self.CELLS_PER_INT):
                if cell == self.width * self.height: break
                x, y = self._cellIndexToPosition(cell)
                self[x][y] = bit
                cell += 1
    def _unpackInt(self, packed, size):
        bools = []
        if packed < 0: raise ValueError, "must be a positive integer"
        for i in range(size):
            n = 2 ** (self.CELLS_PER_INT - i - 1)
            if packed >= n:
                bools.append(True)
                packed -= n
            else:
                bools.append(False)
        return bools
 def reconstituteGrid(bitRep):
    if type(bitRep) is not type((1,2)):
        return bitRep
    width, height = bitRep[:2]
    return Grid(width, height, bitRepresentation= bitRep[2:])
 ####################################
 # Parts you shouldn't have to read #
 ####################################
 class Actions:
    """
    A collection of static methods for manipulating move actions.
    """
    # Directions
    _directions = {Directions.NORTH: (0, 1),
                   Directions.SOUTH: (0, -1),
                   Directions.EAST:  (1, 0),
                   Directions.WEST:  (-1, 0),
                   Directions.STOP:  (0, 0)}
    _directionsAsList = _directions.items()
    TOLERANCE = .001
    def reverseDirection(action):
        if action == Directions.NORTH:
            return Directions.SOUTH
        if action == Directions.SOUTH:
            return Directions.NORTH
        if action == Directions.EAST:
            return Directions.WEST
        if action == Directions.WEST:
            return Directions.EAST
        return action
    reverseDirection = staticmethod(reverseDirection)
    def vectorToDirection(vector):
        dx, dy = vector
        if dy > 0:
            return Directions.NORTH
        if dy < 0:
            return Directions.SOUTH
        if dx < 0:
            return Directions.WEST
        if dx > 0:
            return Directions.EAST
        return Directions.STOP
    vectorToDirection = staticmethod(vectorToDirection)
    def directionToVector(direction, speed = 1.0):
        dx, dy =  Actions._directions[direction]
        return (dx * speed, dy * speed)
    directionToVector = staticmethod(directionToVector)
    def getPossibleActions(config, walls):
        possible = []
        x, y = config.pos
        x_int, y_int = int(x + 0.5), int(y + 0.5)
        # In between grid points, all agents must continue straight
        if (abs(x - x_int) + abs(y - y_int)  > Actions.TOLERANCE):
            return [config.getDirection()]
        for dir, vec in Actions._directionsAsList:
            dx, dy = vec
            next_y = y_int + dy
            next_x = x_int + dx
            if not walls[next_x][next_y]: possible.append(dir)
        return possible
    getPossibleActions = staticmethod(getPossibleActions)
    def getLegalNeighbors(position, walls):
        x,y = position
        x_int, y_int = int(x + 0.5), int(y + 0.5)
        neighbors = []
        for dir, vec in Actions._directionsAsList:
            dx, dy = vec
            next_x = x_int + dx
            if next_x < 0 or next_x == walls.width: continue
            next_y = y_int + dy
            if next_y < 0 or next_y == walls.height: continue
            if not walls[next_x][next_y]: neighbors.append((next_x, next_y))
        return neighbors
    getLegalNeighbors = staticmethod(getLegalNeighbors)
    def getSuccessor(position, action):
        dx, dy = Actions.directionToVector(action)
        x, y = position
        return (x + dx, y + dy)
    getSuccessor = staticmethod(getSuccessor)
 class GameStateData:
    """
    """
    def __init__( self, prevState = None ):
        """
        Generates a new data packet by copying information from its predecessor.
        """
        if prevState != None:
            self.food = prevState.food.shallowCopy()
            self.capsules = prevState.capsules[:]
            self.agentStates = self.copyAgentStates( prevState.agentStates )
            self.layout = prevState.layout
            self._eaten = prevState._eaten
            self.score = prevState.score
        self._foodEaten = None
        self._foodAdded = None
        self._capsuleEaten = None
        self._agentMoved = None
        self._lose = False
        self._win = False
        self.scoreChange = 0
    def deepCopy( self ):
        state = GameStateData( self )
        state.food = self.food.deepCopy()
        state.layout = self.layout.deepCopy()
        state._agentMoved = self._agentMoved
        state._foodEaten = self._foodEaten
        state._foodAdded = self._foodAdded
        state._capsuleEaten = self._capsuleEaten
        return state
    def copyAgentStates( self, agentStates ):
        copiedStates = []
        for agentState in agentStates:
            copiedStates.append( agentState.copy() )
        return copiedStates
    def __eq__( self, other ):
        """
        Allows two states to be compared.
        """
        if other == None: return False
        # TODO Check for type of other
        if not self.agentStates == other.agentStates: return False
        if not self.food == other.food: return False
        if not self.capsules == other.capsules: return False
        if not self.score == other.score: return False
        return True
    def __hash__( self ):
        """
        Allows states to be keys of dictionaries.
        """
        for i, state in enumerate( self.agentStates ):
            try:
                int(hash(state))
            except TypeError, e:
                print e
                #hash(state)
        return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 )
    def __str__( self ):
        width, height = self.layout.width, self.layout.height
        map = Grid(width, height)
        if type(self.food) == type((1,2)):
            self.food = reconstituteGrid(self.food)
        for x in range(width):
            for y in range(height):
                food, walls = self.food, self.layout.walls
                map[x][y] = self._foodWallStr(food[x][y], walls[x][y])
        for agentState in self.agentStates:
            if agentState == None: continue
            if agentState.configuration == None: continue
            x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )]
            agent_dir = agentState.configuration.direction
            if agentState.isPacman:
                map[x][y] = self._pacStr( agent_dir )
            else:
                map[x][y] = self._ghostStr( agent_dir )
        for x, y in self.capsules:
            map[x][y] = 'o'
        return str(map) + ("\nScore: %d\n" % self.score)
    def _foodWallStr( self, hasFood, hasWall ):
        if hasFood:
            return '.'
        elif hasWall:
            return '%'
        else:
            return ' '
    def _pacStr( self, dir ):
        if dir == Directions.NORTH:
            return 'v'
        if dir == Directions.SOUTH:
            return '^'
        if dir == Directions.WEST:
            return '>'
        return '<'
    def _ghostStr( self, dir ):
        return 'G'
        if dir == Directions.NORTH:
            return 'M'
        if dir == Directions.SOUTH:
            return 'W'
        if dir == Directions.WEST:
            return '3'
        return 'E'
    def initialize( self, layout, numGhostAgents ):
        """
        Creates an initial game state from a layout array (see layout.py).
        """
        self.food = layout.food.copy()
        #self.capsules = []
        self.capsules = layout.capsules[:]
        self.layout = layout
        self.score = 0
        self.scoreChange = 0
        self.agentStates = []
        numGhosts = 0
        for isPacman, pos in layout.agentPositions:
            if not isPacman:
                if numGhosts == numGhostAgents: continue # Max ghosts reached already
                else: numGhosts += 1
            self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) )
        self._eaten = [False for a in self.agentStates]
 try:
    import boinc
    _BOINC_ENABLED = True
 except:
    _BOINC_ENABLED = False
 class Game:
    """
    The Game manages the control flow, soliciting actions from agents.
    """
    def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ):
        self.agentCrashed = False
        self.agents = agents
        self.display = display
        self.rules = rules
        self.startingIndex = startingIndex
        self.gameOver = False
        self.muteAgents = muteAgents
        self.catchExceptions = catchExceptions
        self.moveHistory = []
        self.totalAgentTimes = [0 for agent in agents]
        self.totalAgentTimeWarnings = [0 for agent in agents]
        self.agentTimeout = False
        import cStringIO
        self.agentOutput = [cStringIO.StringIO() for agent in agents]
    def getProgress(self):
        if self.gameOver:
            return 1.0
        else:
            return self.rules.getProgress(self)
    def _agentCrash( self, agentIndex, quiet=False):
        "Helper method for handling agent crashes"
        if not quiet: traceback.print_exc()
        self.gameOver = True
        self.agentCrashed = True
        self.rules.agentCrash(self, agentIndex)
    OLD_STDOUT = None
    OLD_STDERR = None
    def mute(self, agentIndex):
        if not self.muteAgents: return
        global OLD_STDOUT, OLD_STDERR
        import cStringIO
        OLD_STDOUT = sys.stdout
        OLD_STDERR = sys.stderr
        sys.stdout = self.agentOutput[agentIndex]
        sys.stderr = self.agentOutput[agentIndex]
    def unmute(self):
        if not self.muteAgents: return
        global OLD_STDOUT, OLD_STDERR
        # Revert stdout/stderr to originals
        sys.stdout = OLD_STDOUT
        sys.stderr = OLD_STDERR
    def run( self ):
        """
        Main control loop for game play.
        """
        self.display.initialize(self.state.data)
        self.numMoves = 0
        ###self.display.initialize(self.state.makeObservation(1).data)
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.agents[i]
            if not agent:
                self.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print >>sys.stderr, "Agent %d failed to load" % i
                self.unmute()
                self._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.mute(i)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
                        try:
                            start_time = time.time()
                            timed_func(self.state.deepCopy())
                            time_taken = time.time() - start_time
                            self.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print >>sys.stderr, "Agent %d ran out of time on startup!" % i
                            self.unmute()
                            self.agentTimeout = True
                            self._agentCrash(i, quiet=True)
                            return
                    except Exception,data:
                        self._agentCrash(i, quiet=False)
                        self.unmute()
                        return
                else:
                    agent.registerInitialState(self.state.deepCopy())
                ## TODO: could this exceed the total time
                self.unmute()
        agentIndex = self.startingIndex
        numAgents = len( self.agents )
        while not self.gameOver:
            # Fetch the next agent
            agent = self.agents[agentIndex]
            move_time = 0
            skip_action = False
            # Generate an observation of the state
            if 'observationFunction' in dir( agent ):
                self.mute(agentIndex)
                if self.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
                        try:
                            start_time = time.time()
                            observation = timed_func(self.state.deepCopy())
                        except TimeoutFunctionException:
                            skip_action = True
                        move_time += time.time() - start_time
                        self.unmute()
                    except Exception,data:
                        self._agentCrash(agentIndex, quiet=False)
                        self.unmute()
                        return
                else:
                    observation = agent.observationFunction(self.state.deepCopy())
                self.unmute()
            else:
                observation = self.state.deepCopy()
            # Solicit an action
            action = None
            self.mute(agentIndex)
            if self.catchExceptions:
                try:
                    timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
                    try:
                        start_time = time.time()
                        if skip_action:
                            raise TimeoutFunctionException()
                        action = timed_func( observation )
                    except TimeoutFunctionException:
                        print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    move_time += time.time() - start_time
                    if move_time > self.rules.getMoveWarningTime(agentIndex):
                        self.totalAgentTimeWarnings[agentIndex] += 1
                        print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
                        if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
                            print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
                            self.agentTimeout = True
                            self._agentCrash(agentIndex, quiet=True)
                            self.unmute()
                            return
                    self.totalAgentTimes[agentIndex] += move_time
                    #print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
                    if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
                        print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
                        self.agentTimeout = True
                        self._agentCrash(agentIndex, quiet=True)
                        self.unmute()
                        return
                    self.unmute()
                except Exception,data:
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                action = agent.getAction(observation)
            self.unmute()
            # Execute the action
            self.moveHistory.append( (agentIndex, action) )
            if self.catchExceptions:
                try:
                    self.state = self.state.generateSuccessor( agentIndex, action )
                except Exception,data:
                    self.mute(agentIndex)
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
            else:
                self.state = self.state.generateSuccessor( agentIndex, action )
            # Change the display
            self.display.update( self.state.data )
            ###idx = agentIndex - agentIndex % 2 + 1
            ###self.display.update( self.state.makeObservation(idx).data )
            # Allow for game specific conditions (winning, losing, etc.)
            self.rules.process(self.state, self)
            # Track progress
            if agentIndex == numAgents + 1: self.numMoves += 1
            # Next agent
            agentIndex = ( agentIndex + 1 ) % numAgents
            if _BOINC_ENABLED:
                boinc.set_fraction_done(self.getProgress())
        # inform a learning agent of the game result
        for agentIndex, agent in enumerate(self.agents):
            if "final" in dir( agent ) :
                try:
                    self.mute(agentIndex)
                    agent.final( self.state )
                    self.unmute()
                except Exception,data:
                    if not self.catchExceptions: raise
                    self._agentCrash(agentIndex)
                    self.unmute()
                    return
        self.display.finish()
@@ -0,0 +1,81 @@
 # ghostAgents.py
 # --------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from game import Agent
 from game import Actions
 from game import Directions
 import random
 from util import manhattanDistance
 import util
 class GhostAgent( Agent ):
    def __init__( self, index ):
        self.index = index
    def getAction( self, state ):
        dist = self.getDistribution(state)
        if len(dist) == 0:
            return Directions.STOP
        else:
            return util.chooseFromDistribution( dist )
    def getDistribution(self, state):
        "Returns a Counter encoding a distribution over actions from the provided state."
        util.raiseNotDefined()
 class RandomGhost( GhostAgent ):
    "A ghost that chooses a legal action uniformly at random."
    def getDistribution( self, state ):
        dist = util.Counter()
        for a in state.getLegalActions( self.index ): dist[a] = 1.0
        dist.normalize()
        return dist
 class DirectionalGhost( GhostAgent ):
    "A ghost that prefers to rush Pacman, or flee when scared."
    def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
        self.index = index
        self.prob_attack = prob_attack
        self.prob_scaredFlee = prob_scaredFlee
    def getDistribution( self, state ):
        # Read variables from state
        ghostState = state.getGhostState( self.index )
        legalActions = state.getLegalActions( self.index )
        pos = state.getGhostPosition( self.index )
        isScared = ghostState.scaredTimer > 0
        speed = 1
        if isScared: speed = 0.5
        actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
        newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
        pacmanPosition = state.getPacmanPosition()
        # Select best actions given the state
        distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
        if isScared:
            bestScore = max( distancesToPacman )
            bestProb = self.prob_scaredFlee
        else:
            bestScore = min( distancesToPacman )
            bestProb = self.prob_attack
        bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
        # Construct distribution
        dist = util.Counter()
        for a in bestActions: dist[a] = bestProb / len(bestActions)
        for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
        dist.normalize()
        return dist
@@ -0,0 +1,282 @@
 # grading.py
 # ----------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 "Common code for autograders"
 import cgi
 import time
 import sys
 import traceback
 import pdb
 from collections import defaultdict
 import util
 class Grades:
  "A data structure for project grades, along with formatting code to display them"
  def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
    """
    Defines the grading scheme for a project
      projectName: project name
      questionsAndMaxesDict: a list of (question name, max points per question)
    """
    self.questions = [el[0] for el in questionsAndMaxesList]
    self.maxes = dict(questionsAndMaxesList)
    self.points = Counter()
    self.messages = dict([(q, []) for q in self.questions])
    self.project = projectName
    self.start = time.localtime()[1:6]
    self.sane = True # Sanity checks
    self.currentQuestion = None # Which question we're grading
    self.edxOutput = edxOutput
    self.mute = muteOutput
    self.prereqs = defaultdict(set)
    #print 'Autograder transcript for %s' % self.project
    print 'Starting on %d-%d at %d:%02d:%02d' % self.start
  def addPrereq(self, question, prereq):
    self.prereqs[question].add(prereq)
  def grade(self, gradingModule, exceptionMap = {}, bonusPic = False):
    """
    Grades each question
      gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
    """
    completedQuestions = set([])
    for q in self.questions:
      print '\nQuestion %s' % q
      print '=' * (9 + len(q))
      print
      self.currentQuestion = q
      incompleted = self.prereqs[q].difference(completedQuestions)
      if len(incompleted) > 0:
          prereq = incompleted.pop()
          print \
 """*** NOTE: Make sure to complete Question %s before working on Question %s,
 *** because Question %s builds upon your answer for Question %s.
 """ % (prereq, q, q, prereq)
          continue
      if self.mute: util.mutePrint()
      try:
        util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
        #TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
      except Exception, inst:
        self.addExceptionMessage(q, inst, traceback)
        self.addErrorHints(exceptionMap, inst, q[1])
      except:
        self.fail('FAIL: Terminated with a string exception.')
      finally:
        if self.mute: util.unmutePrint()
      if self.points[q] >= self.maxes[q]:
        completedQuestions.add(q)
      print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
    print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
    print "\nProvisional grades\n=================="
    for q in self.questions:
      print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
    print '------------------'
    print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
    if bonusPic and self.points.totalCount() == 25:
      print """
                     ALL HAIL GRANDPAC.
              LONG LIVE THE GHOSTBUSTING KING.
                  ---      ----      ---
                  |  \    /  + \    /  |
                  | + \--/      \--/ + |
                  |   +     +          |
                  | +     +        +   |
                @@@@@@@@@@@@@@@@@@@@@@@@@@
              @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            \   @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
             \ /  @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
              V   \   @@@@@@@@@@@@@@@@@@@@@@@@@@@@
                   \ /  @@@@@@@@@@@@@@@@@@@@@@@@@@
                    V     @@@@@@@@@@@@@@@@@@@@@@@@
                            @@@@@@@@@@@@@@@@@@@@@@
                    /\      @@@@@@@@@@@@@@@@@@@@@@
                   /  \  @@@@@@@@@@@@@@@@@@@@@@@@@
              /\  /    @@@@@@@@@@@@@@@@@@@@@@@@@@@
             /  \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            /    @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
            @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
              @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
                @@@@@@@@@@@@@@@@@@@@@@@@@@
                    @@@@@@@@@@@@@@@@@@
 """
    print """
 Your grades are NOT yet registered.  To register your grades, make sure
 to follow your instructor's guidelines to receive credit on your project.
 """
    if self.edxOutput:
        self.produceOutput()
  def addExceptionMessage(self, q, inst, traceback):
    """
    Method to format the exception message, this is more complicated because
    we need to cgi.escape the traceback but wrap the exception in a <pre> tag
    """
    self.fail('FAIL: Exception raised: %s' % inst)
    self.addMessage('')
    for line in traceback.format_exc().split('\n'):
        self.addMessage(line)
  def addErrorHints(self, exceptionMap, errorInstance, questionNum):
    typeOf = str(type(errorInstance))
    questionName = 'q' + questionNum
    errorHint = ''
    # question specific error hints
    if exceptionMap.get(questionName):
      questionMap = exceptionMap.get(questionName)
      if (questionMap.get(typeOf)):
        errorHint = questionMap.get(typeOf)
    # fall back to general error messages if a question specific
    # one does not exist
    if (exceptionMap.get(typeOf)):
      errorHint = exceptionMap.get(typeOf)
    # dont include the HTML if we have no error hint
    if not errorHint:
      return ''
    for line in errorHint.split('\n'):
      self.addMessage(line)
  def produceOutput(self):
    edxOutput = open('edx_response.html', 'w')
    edxOutput.write("<div>")
    # first sum
    total_possible = sum(self.maxes.values())
    total_score = sum(self.points.values())
    checkOrX = '<span class="incorrect"/>'
    if (total_score >= total_possible):
        checkOrX = '<span class="correct"/>'
    header = """
        <h3>
            Total score ({total_score} / {total_possible})
        </h3>
    """.format(total_score = total_score,
      total_possible = total_possible,
      checkOrX = checkOrX
    )
    edxOutput.write(header)
    for q in self.questions:
      if len(q) == 2:
          name = q[1]
      else:
          name = q
      checkOrX = '<span class="incorrect"/>'
      if (self.points[q] == self.maxes[q]):
        checkOrX = '<span class="correct"/>'
      #messages = '\n<br/>\n'.join(self.messages[q])
      messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
      output = """
        <div class="test">
          <section>
          <div class="shortform">
            Question {q} ({points}/{max}) {checkOrX}
          </div>
        <div class="longform">
          {messages}
        </div>
        </section>
      </div>
      """.format(q = name,
        max = self.maxes[q],
        messages = messages,
        checkOrX = checkOrX,
        points = self.points[q]
      )
      # print "*** output for Question %s " % q[1]
      # print output
      edxOutput.write(output)
    edxOutput.write("</div>")
    edxOutput.close()
    edxOutput = open('edx_grade', 'w')
    edxOutput.write(str(self.points.totalCount()))
    edxOutput.close()
  def fail(self, message, raw=False):
    "Sets sanity check bit to false and outputs a message"
    self.sane = False
    self.assignZeroCredit()
    self.addMessage(message, raw)
  def assignZeroCredit(self):
    self.points[self.currentQuestion] = 0
  def addPoints(self, amt):
    self.points[self.currentQuestion] += amt
  def deductPoints(self, amt):
    self.points[self.currentQuestion] -= amt
  def assignFullCredit(self, message="", raw=False):
    self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
    if message != "":
      self.addMessage(message, raw)
  def addMessage(self, message, raw=False):
    if not raw:
        # We assume raw messages, formatted for HTML, are printed separately
        if self.mute: util.unmutePrint()
        print '*** ' + message
        if self.mute: util.mutePrint()
        message = cgi.escape(message)
    self.messages[self.currentQuestion].append(message)
  def addMessageToEmail(self, message):
    print "WARNING**** addMessageToEmail is deprecated %s" % message
    for line in message.split('\n'):
      pass
      #print '%%% ' + line + ' %%%'
      #self.messages[self.currentQuestion].append(line)
 class Counter(dict):
  """
  Dict with default 0
  """
  def __getitem__(self, idx):
    try:
      return dict.__getitem__(self, idx)
    except KeyError:
      return 0
  def totalCount(self):
    """
    Returns the sum of counts for all keys.
    """
    return sum(self.values())
@@ -0,0 +1,333 @@
 # graphicsCrawlerDisplay.py
 # -------------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 # graphicsCrawlerDisplay.py
 # -------------------------
 # Licensing Information: Please do not distribute or publish solutions to this
 # project. You are free to use and extend these projects for educational
 # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
 # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and Pieter
 # Abbeel in Spring 2013.
 # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
 import Tkinter
 import qlearningAgents
 import time
 import threading
 import sys
 import crawler
 #import pendulum
 import math
 from math import pi as PI
 robotType = 'crawler'
 class Application:
    def sigmoid(self, x):
        return 1.0 / (1.0 + 2.0 ** (-x))
    def incrementSpeed(self, inc):
        self.tickTime *= inc
 #        self.epsilon = min(1.0, self.epsilon)
 #        self.epsilon = max(0.0,self.epsilon)
 #        self.learner.setSpeed(self.epsilon)
        self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
    def incrementEpsilon(self, inc):
        self.ep += inc
        self.epsilon = self.sigmoid(self.ep)
        self.learner.setEpsilon(self.epsilon)
        self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
    def incrementGamma(self, inc):
        self.ga += inc
        self.gamma = self.sigmoid(self.ga)
        self.learner.setDiscount(self.gamma)
        self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
    def incrementAlpha(self, inc):
        self.al += inc
        self.alpha = self.sigmoid(self.al)
        self.learner.setLearningRate(self.alpha)
        self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
    def __initGUI(self, win):
        ## Window ##
        self.win = win
        ## Initialize Frame ##
        win.grid()
        self.dec = -.5
        self.inc = .5
        self.tickTime = 0.1
        ## Epsilon Button + Label ##
        self.setupSpeedButtonAndLabel(win)
        self.setupEpsilonButtonAndLabel(win)
        ## Gamma Button + Label ##
        self.setUpGammaButtonAndLabel(win)
        ## Alpha Button + Label ##
        self.setupAlphaButtonAndLabel(win)
        ## Exit Button ##
        #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
        #self.exit_button.grid(row=0, column=9)
        ## Simulation Buttons ##
 #        self.setupSimulationButtons(win)
         ## Canvas ##
        self.canvas = Tkinter.Canvas(root, height=200, width=1000)
        self.canvas.grid(row=2,columnspan=10)
    def setupAlphaButtonAndLabel(self, win):
        self.alpha_minus = Tkinter.Button(win,
        text="-",command=(lambda: self.incrementAlpha(self.dec)))
        self.alpha_minus.grid(row=1, column=3, padx=10)
        self.alpha = self.sigmoid(self.al)
        self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
        self.alpha_label.grid(row=1, column=4)
        self.alpha_plus = Tkinter.Button(win,
        text="+",command=(lambda: self.incrementAlpha(self.inc)))
        self.alpha_plus.grid(row=1, column=5, padx=10)
    def setUpGammaButtonAndLabel(self, win):
        self.gamma_minus = Tkinter.Button(win,
        text="-",command=(lambda: self.incrementGamma(self.dec)))
        self.gamma_minus.grid(row=1, column=0, padx=10)
        self.gamma = self.sigmoid(self.ga)
        self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
        self.gamma_label.grid(row=1, column=1)
        self.gamma_plus = Tkinter.Button(win,
        text="+",command=(lambda: self.incrementGamma(self.inc)))
        self.gamma_plus.grid(row=1, column=2, padx=10)
    def setupEpsilonButtonAndLabel(self, win):
        self.epsilon_minus = Tkinter.Button(win,
        text="-",command=(lambda: self.incrementEpsilon(self.dec)))
        self.epsilon_minus.grid(row=0, column=3)
        self.epsilon = self.sigmoid(self.ep)
        self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
        self.epsilon_label.grid(row=0, column=4)
        self.epsilon_plus = Tkinter.Button(win,
        text="+",command=(lambda: self.incrementEpsilon(self.inc)))
        self.epsilon_plus.grid(row=0, column=5)
    def setupSpeedButtonAndLabel(self, win):
        self.speed_minus = Tkinter.Button(win,
        text="-",command=(lambda: self.incrementSpeed(.5)))
        self.speed_minus.grid(row=0, column=0)
        self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
        self.speed_label.grid(row=0, column=1)
        self.speed_plus = Tkinter.Button(win,
        text="+",command=(lambda: self.incrementSpeed(2)))
        self.speed_plus.grid(row=0, column=2)
    def skip5kSteps(self):
        self.stepsToSkip = 5000
    def __init__(self, win):
        self.ep = 0
        self.ga = 2
        self.al = 2
        self.stepCount = 0
        ## Init Gui
        self.__initGUI(win)
        # Init environment
        if robotType == 'crawler':
            self.robot = crawler.CrawlingRobot(self.canvas)
            self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
        elif robotType == 'pendulum':
            self.robot = pendulum.PendulumRobot(self.canvas)
            self.robotEnvironment = \
                pendulum.PendulumRobotEnvironment(self.robot)
        else:
            raise "Unknown RobotType"
        # Init Agent
        simulationFn = lambda agent: \
          simulation.SimulationEnvironment(self.robotEnvironment,agent)
        actionFn = lambda state: \
          self.robotEnvironment.getPossibleActions(state)
        self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
        self.learner.setEpsilon(self.epsilon)
        self.learner.setLearningRate(self.alpha)
        self.learner.setDiscount(self.gamma)
        # Start GUI
        self.running = True
        self.stopped = False
        self.stepsToSkip = 0
        self.thread = threading.Thread(target=self.run)
        self.thread.start()
    def exit(self):
        self.running = False
        for i in range(5):
            if not self.stopped:
                time.sleep(0.1)
        try:
            self.win.destroy()
        except:
            pass
        sys.exit(0)
    def step(self):
        self.stepCount += 1
        state = self.robotEnvironment.getCurrentState()
        actions = self.robotEnvironment.getPossibleActions(state)
        if len(actions) == 0.0:
            self.robotEnvironment.reset()
            state = self.robotEnvironment.getCurrentState()
            actions = self.robotEnvironment.getPossibleActions(state)
            print 'Reset!'
        action = self.learner.getAction(state)
        if action == None:
            raise 'None action returned: Code Not Complete'
        nextState, reward = self.robotEnvironment.doAction(action)
        self.learner.observeTransition(state, action, nextState, reward)
    def animatePolicy(self):
        if robotType != 'pendulum':
            raise 'Only pendulum can animatePolicy'
        totWidth = self.canvas.winfo_reqwidth()
        totHeight = self.canvas.winfo_reqheight()
        length = 0.48 * min(totWidth, totHeight)
        x,y = totWidth-length-30, length+10
        angleMin, angleMax = self.robot.getMinAndMaxAngle()
        velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
        if not 'animatePolicyBox' in dir(self):
            self.canvas.create_line(x,y,x+length,y)
            self.canvas.create_line(x+length,y,x+length,y-length)
            self.canvas.create_line(x+length,y-length,x,y-length)
            self.canvas.create_line(x,y-length,x,y)
            self.animatePolicyBox = 1
            self.canvas.create_text(x+length/2,y+10,text='angle')
            self.canvas.create_text(x-30,y-length/2,text='velocity')
            self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
            self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
            self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
        angleDelta = (angleMax-angleMin) / 100
        velDelta = (velMax-velMin) / 100
        for i in range(100):
            angle = angleMin + i * angleDelta
            for j in range(100):
                vel = velMin + j * velDelta
                state = self.robotEnvironment.getState(angle,vel)
                max, argMax = None, None
                if not self.learner.seenState(state):
                    argMax = 'unseen'
                else:
                    for action in ('kickLeft','kickRight','doNothing'):
                        qVal = self.learner.getQValue(state, action)
                        if max == None or qVal > max:
                            max, argMax = qVal, action
                if argMax != 'unseen':
                    if argMax == 'kickLeft':
                        color = 'blue'
                    elif argMax == 'kickRight':
                        color = 'red'
                    elif argMax == 'doNothing':
                        color = 'white'
                    dx = length / 100.0
                    dy = length / 100.0
                    x0, y0 = x+i*dx, y-j*dy
                    self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
    def run(self):
        self.stepCount = 0
        self.learner.startEpisode()
        while True:
            minSleep = .01
            tm = max(minSleep, self.tickTime)
            time.sleep(tm)
            self.stepsToSkip = int(tm / self.tickTime) - 1
            if not self.running:
                self.stopped = True
                return
            for i in range(self.stepsToSkip):
                self.step()
            self.stepsToSkip = 0
            self.step()
 #          self.robot.draw()
        self.learner.stopEpisode()
    def start(self):
        self.win.mainloop()
 def run():
    global root
    root = Tkinter.Tk()
    root.title( 'Crawler GUI' )
    root.resizable( 0, 0 )
 #  root.mainloop()
    app = Application(root)
    def update_gui():
        app.robot.draw(app.stepCount, app.tickTime)
        root.after(10, update_gui)
    update_gui()
    root.protocol( 'WM_DELETE_WINDOW', app.exit)
    try:
        app.start()
    except:
        app.exit()
@@ -0,0 +1,679 @@
 # graphicsDisplay.py
 # ------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from graphicsUtils import *
 import math, time
 from game import Directions
 ###########################
 #  GRAPHICS DISPLAY CODE  #
 ###########################
 # Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley.
 # Some code from a Pacman implementation by LiveWires, and used / modified with permission.
 DEFAULT_GRID_SIZE = 30.0
 INFO_PANE_HEIGHT = 35
 BACKGROUND_COLOR = formatColor(0,0,0)
 WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0)
 INFO_PANE_COLOR = formatColor(.4,.4,0)
 SCORE_COLOR = formatColor(.9, .9, .9)
 PACMAN_OUTLINE_WIDTH = 2
 PACMAN_CAPTURE_OUTLINE_WIDTH = 4
 GHOST_COLORS = []
 GHOST_COLORS.append(formatColor(.9,0,0)) # Red
 GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue
 GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange
 GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green
 GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow
 GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple
 TEAM_COLORS = GHOST_COLORS[:2]
 GHOST_SHAPE = [
    ( 0,    0.3 ),
    ( 0.25, 0.75 ),
    ( 0.5,  0.3 ),
    ( 0.75, 0.75 ),
    ( 0.75, -0.5 ),
    ( 0.5,  -0.75 ),
    (-0.5,  -0.75 ),
    (-0.75, -0.5 ),
    (-0.75, 0.75 ),
    (-0.5,  0.3 ),
    (-0.25, 0.75 )
  ]
 GHOST_SIZE = 0.65
 SCARED_COLOR = formatColor(1,1,1)
 GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS)
 PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255)
 PACMAN_SCALE = 0.5
 #pacman_speed = 0.25
 # Food
 FOOD_COLOR = formatColor(1,1,1)
 FOOD_SIZE = 0.1
 # Laser
 LASER_COLOR = formatColor(1,0,0)
 LASER_SIZE = 0.02
 # Capsule graphics
 CAPSULE_COLOR = formatColor(1,1,1)
 CAPSULE_SIZE = 0.25
 # Drawing walls
 WALL_RADIUS = 0.15
 class InfoPane:
    def __init__(self, layout, gridSize):
        self.gridSize = gridSize
        self.width = (layout.width) * gridSize
        self.base = (layout.height + 1) * gridSize
        self.height = INFO_PANE_HEIGHT
        self.fontSize = 24
        self.textColor = PACMAN_COLOR
        self.drawPane()
    def toScreen(self, pos, y = None):
        """
          Translates a point relative from the bottom left of the info pane.
        """
        if y == None:
            x,y = pos
        else:
            x = pos
        x = self.gridSize + x # Margin
        y = self.base + y
        return x,y
    def drawPane(self):
        self.scoreText = text( self.toScreen(0, 0  ), self.textColor, "SCORE:    0", "Times", self.fontSize, "bold")
    def initializeGhostDistances(self, distances):
        self.ghostDistanceText = []
        size = 20
        if self.width < 240:
            size = 12
        if self.width < 160:
            size = 10
        for i, d in enumerate(distances):
            t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold")
            self.ghostDistanceText.append(t)
    def updateScore(self, score):
        changeText(self.scoreText, "SCORE: % 4d" % score)
    def setTeam(self, isBlue):
        text = "RED TEAM"
        if isBlue: text = "BLUE TEAM"
        self.teamText = text( self.toScreen(300, 0  ), self.textColor, text, "Times", self.fontSize, "bold")
    def updateGhostDistances(self, distances):
        if len(distances) == 0: return
        if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances)
        else:
            for i, d in enumerate(distances):
                changeText(self.ghostDistanceText[i], d)
    def drawGhost(self):
        pass
    def drawPacman(self):
        pass
    def drawWarning(self):
        pass
    def clearIcon(self):
        pass
    def updateMessage(self, message):
        pass
    def clearMessage(self):
        pass
 class PacmanGraphics:
    def __init__(self, zoom=1.0, frameTime=0.0, capture=False):
        self.have_window = 0
        self.currentGhostImages = {}
        self.pacmanImage = None
        self.zoom = zoom
        self.gridSize = DEFAULT_GRID_SIZE * zoom
        self.capture = capture
        self.frameTime = frameTime
    def checkNullDisplay(self):
        return False
    def initialize(self, state, isBlue = False):
        self.isBlue = isBlue
        self.startGraphics(state)
        # self.drawDistributions(state)
        self.distributionImages = None  # Initialized lazily
        self.drawStaticObjects(state)
        self.drawAgentObjects(state)
        # Information
        self.previousState = state
    def startGraphics(self, state):
        self.layout = state.layout
        layout = self.layout
        self.width = layout.width
        self.height = layout.height
        self.make_window(self.width, self.height)
        self.infoPane = InfoPane(layout, self.gridSize)
        self.currentState = layout
    def drawDistributions(self, state):
        walls = state.layout.walls
        dist = []
        for x in range(walls.width):
            distx = []
            dist.append(distx)
            for y in range(walls.height):
                ( screen_x, screen_y ) = self.to_screen( (x, y) )
                block = square( (screen_x, screen_y),
                                0.5 * self.gridSize,
                                color = BACKGROUND_COLOR,
                                filled = 1, behind=2)
                distx.append(block)
        self.distributionImages = dist
    def drawStaticObjects(self, state):
        layout = self.layout
        self.drawWalls(layout.walls)
        self.food = self.drawFood(layout.food)
        self.capsules = self.drawCapsules(layout.capsules)
        refresh()
    def drawAgentObjects(self, state):
        self.agentImages = [] # (agentState, image)
        for index, agent in enumerate(state.agentStates):
            if agent.isPacman:
                image = self.drawPacman(agent, index)
                self.agentImages.append( (agent, image) )
            else:
                image = self.drawGhost(agent, index)
                self.agentImages.append( (agent, image) )
        refresh()
    def swapImages(self, agentIndex, newState):
        """
          Changes an image from a ghost to a pacman or vis versa (for capture)
        """
        prevState, prevImage = self.agentImages[agentIndex]
        for item in prevImage: remove_from_screen(item)
        if newState.isPacman:
            image = self.drawPacman(newState, agentIndex)
            self.agentImages[agentIndex] = (newState, image )
        else:
            image = self.drawGhost(newState, agentIndex)
            self.agentImages[agentIndex] = (newState, image )
        refresh()
    def update(self, newState):
        agentIndex = newState._agentMoved
        agentState = newState.agentStates[agentIndex]
        if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState)
        prevState, prevImage = self.agentImages[agentIndex]
        if agentState.isPacman:
            self.animatePacman(agentState, prevState, prevImage)
        else:
            self.moveGhost(agentState, agentIndex, prevState, prevImage)
        self.agentImages[agentIndex] = (agentState, prevImage)
        if newState._foodEaten != None:
            self.removeFood(newState._foodEaten, self.food)
        if newState._capsuleEaten != None:
            self.removeCapsule(newState._capsuleEaten, self.capsules)
        self.infoPane.updateScore(newState.score)
        if 'ghostDistances' in dir(newState):
            self.infoPane.updateGhostDistances(newState.ghostDistances)
    def make_window(self, width, height):
        grid_width = (width-1) * self.gridSize
        grid_height = (height-1) * self.gridSize
        screen_width = 2*self.gridSize + grid_width
        screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT
        begin_graphics(screen_width,
                       screen_height,
                       BACKGROUND_COLOR,
                       "CS188 Pacman")
    def drawPacman(self, pacman, index):
        position = self.getPosition(pacman)
        screen_point = self.to_screen(position)
        endpoints = self.getEndpoints(self.getDirection(pacman))
        width = PACMAN_OUTLINE_WIDTH
        outlineColor = PACMAN_COLOR
        fillColor = PACMAN_COLOR
        if self.capture:
            outlineColor = TEAM_COLORS[index % 2]
            fillColor = GHOST_COLORS[index]
            width = PACMAN_CAPTURE_OUTLINE_WIDTH
        return [circle(screen_point, PACMAN_SCALE * self.gridSize,
                       fillColor = fillColor, outlineColor = outlineColor,
                       endpoints = endpoints,
                       width = width)]
    def getEndpoints(self, direction, position=(0,0)):
        x, y = position
        pos = x - int(x) + y - int(y)
        width = 30 + 80 * math.sin(math.pi* pos)
        delta = width / 2
        if (direction == 'West'):
            endpoints = (180+delta, 180-delta)
        elif (direction == 'North'):
            endpoints = (90+delta, 90-delta)
        elif (direction == 'South'):
            endpoints = (270+delta, 270-delta)
        else:
            endpoints = (0+delta, 0-delta)
        return endpoints
    def movePacman(self, position, direction, image):
        screenPosition = self.to_screen(position)
        endpoints = self.getEndpoints( direction, position )
        r = PACMAN_SCALE * self.gridSize
        moveCircle(image[0], screenPosition, r, endpoints)
        refresh()
    def animatePacman(self, pacman, prevPacman, image):
        if self.frameTime < 0:
            print 'Press any key to step forward, "q" to play'
            keys = wait_for_keys()
            if 'q' in keys:
                self.frameTime = 0.1
        if self.frameTime > 0.01 or self.frameTime < 0:
            start = time.time()
            fx, fy = self.getPosition(prevPacman)
            px, py = self.getPosition(pacman)
            frames = 4.0
            for i in range(1,int(frames) + 1):
                pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames
                self.movePacman(pos, self.getDirection(pacman), image)
                refresh()
                sleep(abs(self.frameTime) / frames)
        else:
            self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image)
        refresh()
    def getGhostColor(self, ghost, ghostIndex):
        if ghost.scaredTimer > 0:
            return SCARED_COLOR
        else:
            return GHOST_COLORS[ghostIndex]
    def drawGhost(self, ghost, agentIndex):
        pos = self.getPosition(ghost)
        dir = self.getDirection(ghost)
        (screen_x, screen_y) = (self.to_screen(pos) )
        coords = []
        for (x, y) in GHOST_SHAPE:
            coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y))
        colour = self.getGhostColor(ghost, agentIndex)
        body = polygon(coords, colour, filled = 1)
        WHITE = formatColor(1.0, 1.0, 1.0)
        BLACK = formatColor(0.0, 0.0, 0.0)
        dx = 0
        dy = 0
        if dir == 'North':
            dy = -0.2
        if dir == 'South':
            dy = 0.2
        if dir == 'East':
            dx = 0.2
        if dir == 'West':
            dx = -0.2
        leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
        rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
        leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
        rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
        ghostImageParts = []
        ghostImageParts.append(body)
        ghostImageParts.append(leftEye)
        ghostImageParts.append(rightEye)
        ghostImageParts.append(leftPupil)
        ghostImageParts.append(rightPupil)
        return ghostImageParts
    def moveEyes(self, pos, dir, eyes):
        (screen_x, screen_y) = (self.to_screen(pos) )
        dx = 0
        dy = 0
        if dir == 'North':
            dy = -0.2
        if dir == 'South':
            dy = 0.2
        if dir == 'East':
            dx = 0.2
        if dir == 'West':
            dx = -0.2
        moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
        moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
        moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
        moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
    def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts):
        old_x, old_y = self.to_screen(self.getPosition(prevGhost))
        new_x, new_y = self.to_screen(self.getPosition(ghost))
        delta = new_x - old_x, new_y - old_y
        for ghostImagePart in ghostImageParts:
            move_by(ghostImagePart, delta)
        refresh()
        if ghost.scaredTimer > 0:
            color = SCARED_COLOR
        else:
            color = GHOST_COLORS[ghostIndex]
        edit(ghostImageParts[0], ('fill', color), ('outline', color))
        self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:])
        refresh()
    def getPosition(self, agentState):
        if agentState.configuration == None: return (-1000, -1000)
        return agentState.getPosition()
    def getDirection(self, agentState):
        if agentState.configuration == None: return Directions.STOP
        return agentState.configuration.getDirection()
    def finish(self):
        end_graphics()
    def to_screen(self, point):
        ( x, y ) = point
        #y = self.height - y
        x = (x + 1)*self.gridSize
        y = (self.height  - y)*self.gridSize
        return ( x, y )
    # Fixes some TK issue with off-center circles
    def to_screen2(self, point):
        ( x, y ) = point
        #y = self.height - y
        x = (x + 1)*self.gridSize
        y = (self.height  - y)*self.gridSize
        return ( x, y )
    def drawWalls(self, wallMatrix):
        wallColor = WALL_COLOR
        for xNum, x in enumerate(wallMatrix):
            if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0]
            if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1]
            for yNum, cell in enumerate(x):
                if cell: # There's a wall here
                    pos = (xNum, yNum)
                    screen = self.to_screen(pos)
                    screen2 = self.to_screen2(pos)
                    # draw each quadrant of the square based on adjacent walls
                    wIsWall = self.isWall(xNum-1, yNum, wallMatrix)
                    eIsWall = self.isWall(xNum+1, yNum, wallMatrix)
                    nIsWall = self.isWall(xNum, yNum+1, wallMatrix)
                    sIsWall = self.isWall(xNum, yNum-1, wallMatrix)
                    nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix)
                    swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix)
                    neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix)
                    seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix)
                    # NE quadrant
                    if (not nIsWall) and (not eIsWall):
                        # inner circle
                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc')
                    if (nIsWall) and (not eIsWall):
                        # vertical line
                        line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
                    if (not nIsWall) and (eIsWall):
                        # horizontal line
                        line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
                    if (nIsWall) and (eIsWall) and (not neIsWall):
                        # outer circle
                        circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc')
                        line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
                        line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
                    # NW quadrant
                    if (not nIsWall) and (not wIsWall):
                        # inner circle
                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc')
                    if (nIsWall) and (not wIsWall):
                        # vertical line
                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
                    if (not nIsWall) and (wIsWall):
                        # horizontal line
                        line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
                    if (nIsWall) and (wIsWall) and (not nwIsWall):
                        # outer circle
                        circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc')
                        line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor)
                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
                    # SE quadrant
                    if (not sIsWall) and (not eIsWall):
                        # inner circle
                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc')
                    if (sIsWall) and (not eIsWall):
                        # vertical line
                        line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
                    if (not sIsWall) and (eIsWall):
                        # horizontal line
                        line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
                    if (sIsWall) and (eIsWall) and (not seIsWall):
                        # outer circle
                        circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc')
                        line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor)
                        line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
                    # SW quadrant
                    if (not sIsWall) and (not wIsWall):
                        # inner circle
                        circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc')
                    if (sIsWall) and (not wIsWall):
                        # vertical line
                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
                    if (not sIsWall) and (wIsWall):
                        # horizontal line
                        line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
                    if (sIsWall) and (wIsWall) and (not swIsWall):
                        # outer circle
                        circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc')
                        line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor)
                        line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
    def isWall(self, x, y, walls):
        if x < 0 or y < 0:
            return False
        if x >= walls.width or y >= walls.height:
            return False
        return walls[x][y]
    def drawFood(self, foodMatrix ):
        foodImages = []
        color = FOOD_COLOR
        for xNum, x in enumerate(foodMatrix):
            if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0]
            if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1]
            imageRow = []
            foodImages.append(imageRow)
            for yNum, cell in enumerate(x):
                if cell: # There's food here
                    screen = self.to_screen((xNum, yNum ))
                    dot = circle( screen,
                                  FOOD_SIZE * self.gridSize,
                                  outlineColor = color, fillColor = color,
                                  width = 1)
                    imageRow.append(dot)
                else:
                    imageRow.append(None)
        return foodImages
    def drawCapsules(self, capsules ):
        capsuleImages = {}
        for capsule in capsules:
            ( screen_x, screen_y ) = self.to_screen(capsule)
            dot = circle( (screen_x, screen_y),
                              CAPSULE_SIZE * self.gridSize,
                              outlineColor = CAPSULE_COLOR,
                              fillColor = CAPSULE_COLOR,
                              width = 1)
            capsuleImages[capsule] = dot
        return capsuleImages
    def removeFood(self, cell, foodImages ):
        x, y = cell
        remove_from_screen(foodImages[x][y])
    def removeCapsule(self, cell, capsuleImages ):
        x, y = cell
        remove_from_screen(capsuleImages[(x, y)])
    def drawExpandedCells(self, cells):
        """
        Draws an overlay of expanded grid positions for search agents
        """
        n = float(len(cells))
        baseColor = [1.0, 0.0, 0.0]
        self.clearExpandedCells()
        self.expandedCells = []
        for k, cell in enumerate(cells):
            screenPos = self.to_screen( cell)
            cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor])
            block = square(screenPos,
                     0.5 * self.gridSize,
                     color = cellColor,
                     filled = 1, behind=2)
            self.expandedCells.append(block)
            if self.frameTime < 0:
                refresh()
    def clearExpandedCells(self):
        if 'expandedCells' in dir(self) and len(self.expandedCells) > 0:
            for cell in self.expandedCells:
                remove_from_screen(cell)
    def updateDistributions(self, distributions):
        "Draws an agent's belief distributions"
        # copy all distributions so we don't change their state
        distributions = map(lambda x: x.copy(), distributions)
        if self.distributionImages == None:
            self.drawDistributions(self.previousState)
        for x in range(len(self.distributionImages)):
            for y in range(len(self.distributionImages[0])):
                image = self.distributionImages[x][y]
                weights = [dist[ (x,y) ] for dist in distributions]
                if sum(weights) != 0:
                    pass
                # Fog of war
                color = [0.0,0.0,0.0]
                colors = GHOST_VEC_COLORS[1:] # With Pacman
                if self.capture: colors = GHOST_VEC_COLORS
                for weight, gcolor in zip(weights, colors):
                    color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)]
                changeColor(image, formatColor(*color))
        refresh()
 class FirstPersonPacmanGraphics(PacmanGraphics):
    def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0):
        PacmanGraphics.__init__(self, zoom, frameTime=frameTime)
        self.showGhosts = showGhosts
        self.capture = capture
    def initialize(self, state, isBlue = False):
        self.isBlue = isBlue
        PacmanGraphics.startGraphics(self, state)
        # Initialize distribution images
        walls = state.layout.walls
        dist = []
        self.layout = state.layout
        # Draw the rest
        self.distributionImages = None  # initialize lazily
        self.drawStaticObjects(state)
        self.drawAgentObjects(state)
        # Information
        self.previousState = state
    def lookAhead(self, config, state):
        if config.getDirection() == 'Stop':
            return
        else:
            pass
            # Draw relevant ghosts
            allGhosts = state.getGhostStates()
            visibleGhosts = state.getVisibleGhosts()
            for i, ghost in enumerate(allGhosts):
                if ghost in visibleGhosts:
                    self.drawGhost(ghost, i)
                else:
                    self.currentGhostImages[i] = None
    def getGhostColor(self, ghost, ghostIndex):
        return GHOST_COLORS[ghostIndex]
    def getPosition(self, ghostState):
        if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1:
            return (-1000, -1000)
        else:
            return PacmanGraphics.getPosition(self, ghostState)
 def add(x, y):
    return (x[0] + y[0], x[1] + y[1])
 # Saving graphical output
 # -----------------------
 # Note: to make an animated gif from this postscript output, try the command:
 # convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif
 # convert is part of imagemagick (freeware)
 SAVE_POSTSCRIPT = False
 POSTSCRIPT_OUTPUT_DIR = 'frames'
 FRAME_NUMBER = 0
 import os
 def saveFrame():
    "Saves the current graphical output as a postscript file"
    global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR
    if not SAVE_POSTSCRIPT: return
    if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR)
    name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER)
    FRAME_NUMBER += 1
    writePostscript(name) # writes the current canvas
@@ -0,0 +1,348 @@
 # graphicsGridworldDisplay.py
 # ---------------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import util
 from graphicsUtils import *
 class GraphicsGridworldDisplay:
    def __init__(self, gridworld, size=120, speed=1.0):
        self.gridworld = gridworld
        self.size = size
        self.speed = speed
    def start(self):
        setup(self.gridworld, size=self.size)
    def pause(self):
        wait_for_keys()
    def displayValues(self, agent, currentState = None, message = 'Agent Values'):
        values = util.Counter()
        policy = {}
        states = self.gridworld.getStates()
        for state in states:
            values[state] = agent.getValue(state)
            policy[state] = agent.getPolicy(state)
        drawValues(self.gridworld, values, policy, currentState, message)
        sleep(0.05 / self.speed)
    def displayNullValues(self, currentState = None, message = ''):
        values = util.Counter()
        #policy = {}
        states = self.gridworld.getStates()
        for state in states:
            values[state] = 0.0
            #policy[state] = agent.getPolicy(state)
        drawNullValues(self.gridworld, currentState,'')
        # drawValues(self.gridworld, values, policy, currentState, message)
        sleep(0.05 / self.speed)
    def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
        qValues = util.Counter()
        states = self.gridworld.getStates()
        for state in states:
            for action in self.gridworld.getPossibleActions(state):
                qValues[(state, action)] = agent.getQValue(state, action)
        drawQValues(self.gridworld, qValues, currentState, message)
        sleep(0.05 / self.speed)
 BACKGROUND_COLOR = formatColor(0,0,0)
 EDGE_COLOR = formatColor(1,1,1)
 OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
 TEXT_COLOR = formatColor(1,1,1)
 MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
 LOCATION_COLOR = formatColor(0,0,1)
 WINDOW_SIZE = -1
 GRID_SIZE = -1
 GRID_HEIGHT = -1
 MARGIN = -1
 def setup(gridworld, title = "Gridworld Display", size = 120):
    global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
    grid = gridworld.grid
    WINDOW_SIZE = size
    GRID_SIZE = size
    GRID_HEIGHT = grid.height
    MARGIN = GRID_SIZE * 0.75
    screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
    screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
    begin_graphics(screen_width,
                   screen_height,
                   BACKGROUND_COLOR, title=title)
 def drawNullValues(gridworld, currentState = None, message = ''):
    grid = gridworld.grid
    blank()
    for x in range(grid.width):
        for y in range(grid.height):
            state = (x, y)
            gridType = grid[x][y]
            isExit = (str(gridType) != gridType)
            isCurrent = (currentState == state)
            if gridType == '#':
                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
            else:
                drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
 def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
    grid = gridworld.grid
    blank()
    valueList = [values[state] for state in gridworld.getStates()] + [0.0]
    minValue = min(valueList)
    maxValue = max(valueList)
    for x in range(grid.width):
        for y in range(grid.height):
            state = (x, y)
            gridType = grid[x][y]
            isExit = (str(gridType) != gridType)
            isCurrent = (currentState == state)
            if gridType == '#':
                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
            else:
                value = values[state]
                action = None
                if policy != None and state in policy:
                    action = policy[state]
                    actions = gridworld.getPossibleActions(state)
                if action not in actions and 'exit' in actions:
                    action = 'exit'
                valString = '%.2f' % value
                drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
 def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
    grid = gridworld.grid
    blank()
    stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
    qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
    qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
    minValue = min(qValueList)
    maxValue = max(qValueList)
    for x in range(grid.width):
        for y in range(grid.height):
            state = (x, y)
            gridType = grid[x][y]
            isExit = (str(gridType) != gridType)
            isCurrent = (currentState == state)
            actions = gridworld.getPossibleActions(state)
            if actions == None or len(actions) == 0:
                actions = [None]
            bestQ = max([qValues[(state, action)] for action in actions])
            bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
            q = util.Counter()
            valStrings = {}
            for action in actions:
                v = qValues[(state, action)]
                q[action] += v
                valStrings[action] = '%.2f' % v
            if gridType == '#':
                drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
            elif isExit:
                action = 'exit'
                value = q[action]
                valString = '%.2f' % value
                drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
            else:
                drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
    pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
    text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
 def blank():
    clear_screen()
 def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
    square_color = getColor(0, -1, 1)
    if isObstacle:
        square_color = OBSTACLE_COLOR
    (screen_x, screen_y) = to_screen((x, y))
    square( (screen_x, screen_y),
                   0.5* GRID_SIZE,
                   color = square_color,
                   filled = 1,
                   width = 1)
    square( (screen_x, screen_y),
                   0.5* GRID_SIZE,
                   color = EDGE_COLOR,
                   filled = 0,
                   width = 3)
    if isTerminal and not isObstacle:
        square( (screen_x, screen_y),
                     0.4* GRID_SIZE,
                     color = EDGE_COLOR,
                     filled = 0,
                     width = 2)
        text( (screen_x, screen_y),
               TEXT_COLOR,
               str(grid[x][y]),
               "Courier", -24, "bold", "c")
    text_color = TEXT_COLOR
    if not isObstacle and isCurrent:
        circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
    # if not isObstacle:
    #   text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
 def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
    square_color = getColor(val, min, max)
    if isObstacle:
        square_color = OBSTACLE_COLOR
    (screen_x, screen_y) = to_screen((x, y))
    square( (screen_x, screen_y),
                   0.5* GRID_SIZE,
                   color = square_color,
                   filled = 1,
                   width = 1)
    square( (screen_x, screen_y),
                   0.5* GRID_SIZE,
                   color = EDGE_COLOR,
                   filled = 0,
                   width = 3)
    if isTerminal and not isObstacle:
        square( (screen_x, screen_y),
                     0.4* GRID_SIZE,
                     color = EDGE_COLOR,
                     filled = 0,
                     width = 2)
    if action == 'north':
        polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
    if action == 'south':
        polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
    if action == 'west':
        polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
    if action == 'east':
        polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
    text_color = TEXT_COLOR
    if not isObstacle and isCurrent:
        circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
    if not isObstacle:
        text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
 def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
    (screen_x, screen_y) = to_screen((x, y))
    center = (screen_x, screen_y)
    nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
    ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
    se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
    sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
    n = (screen_x, screen_y-0.5*GRID_SIZE+5)
    s = (screen_x, screen_y+0.5*GRID_SIZE-5)
    w = (screen_x-0.5*GRID_SIZE+5, screen_y)
    e = (screen_x+0.5*GRID_SIZE-5, screen_y)
    actions = qVals.keys()
    for action in actions:
        wedge_color = getColor(qVals[action], minVal, maxVal)
        if action == 'north':
            polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
            #text(n, text_color, valStr, "Courier", 8, "bold", "n")
        if action == 'south':
            polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
            #text(s, text_color, valStr, "Courier", 8, "bold", "s")
        if action == 'east':
            polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
            #text(e, text_color, valStr, "Courier", 8, "bold", "e")
        if action == 'west':
            polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
            #text(w, text_color, valStr, "Courier", 8, "bold", "w")
    square( (screen_x, screen_y),
                   0.5* GRID_SIZE,
                   color = EDGE_COLOR,
                   filled = 0,
                   width = 3)
    line(ne, sw, color = EDGE_COLOR)
    line(nw, se, color = EDGE_COLOR)
    if isCurrent:
        circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
    for action in actions:
        text_color = TEXT_COLOR
        if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
        valStr = ""
        if action in valStrs:
            valStr = valStrs[action]
        h = -20
        if action == 'north':
            #polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
            text(n, text_color, valStr, "Courier", h, "bold", "n")
        if action == 'south':
            #polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
            text(s, text_color, valStr, "Courier", h, "bold", "s")
        if action == 'east':
            #polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
            text(e, text_color, valStr, "Courier", h, "bold", "e")
        if action == 'west':
            #polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
            text(w, text_color, valStr, "Courier", h, "bold", "w")
 def getColor(val, minVal, max):
    r, g = 0.0, 0.0
    if val < 0 and minVal < 0:
        r = val * 0.65 / minVal
    if val > 0 and max > 0:
        g = val * 0.65 / max
    return formatColor(r,g,0.0)
 def square(pos, size, color, filled, width):
    x, y = pos
    dx, dy = size, size
    return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
 def to_screen(point):
    ( gamex, gamey ) = point
    x = gamex*GRID_SIZE + MARGIN
    y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
    return ( x, y )
 def to_grid(point):
    (x, y) = point
    x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
    y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
    print point, "-->", (x, y)
    return (x, y)
@@ -0,0 +1,398 @@
 # graphicsUtils.py
 # ----------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import sys
 import math
 import random
 import string
 import time
 import types
 import Tkinter
 _Windows = sys.platform == 'win32'  # True if on Win95/98/NT
 _root_window = None      # The root window for graphics output
 _canvas = None      # The canvas which holds graphics
 _canvas_xs = None      # Size of canvas object
 _canvas_ys = None
 _canvas_x = None      # Current position on canvas
 _canvas_y = None
 _canvas_col = None      # Current colour (set to black below)
 _canvas_tsize = 12
 _canvas_tserifs = 0
 def formatColor(r, g, b):
    return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
 def colorToVector(color):
    return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
 if _Windows:
    _canvas_tfonts = ['times new roman', 'lucida console']
 else:
    _canvas_tfonts = ['times', 'lucidasans-24']
    pass # XXX need defaults here
 def sleep(secs):
    global _root_window
    if _root_window == None:
        time.sleep(secs)
    else:
        _root_window.update_idletasks()
        _root_window.after(int(1000 * secs), _root_window.quit)
        _root_window.mainloop()
 def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
    global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
    # Check for duplicate call
    if _root_window is not None:
        # Lose the window.
        _root_window.destroy()
    # Save the canvas size parameters
    _canvas_xs, _canvas_ys = width - 1, height - 1
    _canvas_x, _canvas_y = 0, _canvas_ys
    _bg_color = color
    # Create the root window
    _root_window = Tkinter.Tk()
    _root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
    _root_window.title(title or 'Graphics Window')
    _root_window.resizable(0, 0)
    # Create the canvas object
    try:
        _canvas = Tkinter.Canvas(_root_window, width=width, height=height)
        _canvas.pack()
        draw_background()
        _canvas.update()
    except:
        _root_window = None
        raise
    # Bind to key-down and key-up events
    _root_window.bind( "<KeyPress>", _keypress )
    _root_window.bind( "<KeyRelease>", _keyrelease )
    _root_window.bind( "<FocusIn>", _clear_keys )
    _root_window.bind( "<FocusOut>", _clear_keys )
    _root_window.bind( "<Button-1>", _leftclick )
    _root_window.bind( "<Button-2>", _rightclick )
    _root_window.bind( "<Button-3>", _rightclick )
    _root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
    _clear_keys()
 _leftclick_loc = None
 _rightclick_loc = None
 _ctrl_leftclick_loc = None
 def _leftclick(event):
    global _leftclick_loc
    _leftclick_loc = (event.x, event.y)
 def _rightclick(event):
    global _rightclick_loc
    _rightclick_loc = (event.x, event.y)
 def _ctrl_leftclick(event):
    global _ctrl_leftclick_loc
    _ctrl_leftclick_loc = (event.x, event.y)
 def wait_for_click():
    while True:
        global _leftclick_loc
        global _rightclick_loc
        global _ctrl_leftclick_loc
        if _leftclick_loc != None:
            val = _leftclick_loc
            _leftclick_loc = None
            return val, 'left'
        if _rightclick_loc != None:
            val = _rightclick_loc
            _rightclick_loc = None
            return val, 'right'
        if _ctrl_leftclick_loc != None:
            val = _ctrl_leftclick_loc
            _ctrl_leftclick_loc = None
            return val, 'ctrl_left'
        sleep(0.05)
 def draw_background():
    corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
    polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
 def _destroy_window(event=None):
    sys.exit(0)
 #    global _root_window
 #    _root_window.destroy()
 #    _root_window = None
    #print "DESTROY"
 def end_graphics():
    global _root_window, _canvas, _mouse_enabled
    try:
        try:
            sleep(1)
            if _root_window != None:
                _root_window.destroy()
        except SystemExit, e:
            print 'Ending graphics raised an exception:', e
    finally:
        _root_window = None
        _canvas = None
        _mouse_enabled = 0
        _clear_keys()
 def clear_screen(background=None):
    global _canvas_x, _canvas_y
    _canvas.delete('all')
    draw_background()
    _canvas_x, _canvas_y = 0, _canvas_ys
 def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
    c = []
    for coord in coords:
        c.append(coord[0])
        c.append(coord[1])
    if fillColor == None: fillColor = outlineColor
    if filled == 0: fillColor = ""
    poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
    if behind > 0:
        _canvas.tag_lower(poly, behind) # Higher should be more visible
    return poly
 def square(pos, r, color, filled=1, behind=0):
    x, y = pos
    coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
    return polygon(coords, color, color, filled, 0, behind=behind)
 def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
    x, y = pos
    x0, x1 = x - r - 1, x + r
    y0, y1 = y - r - 1, y + r
    if endpoints == None:
        e = [0, 359]
    else:
        e = list(endpoints)
    while e[0] > e[1]: e[1] = e[1] + 360
    return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
                              extent=e[1] - e[0], start=e[0], style=style, width=width)
 def image(pos, file="../../blueghost.gif"):
    x, y = pos
    # img = PhotoImage(file=file)
    return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
 def refresh():
    _canvas.update_idletasks()
 def moveCircle(id, pos, r, endpoints=None):
    global _canvas_x, _canvas_y
    x, y = pos
 #    x0, x1 = x - r, x + r + 1
 #    y0, y1 = y - r, y + r + 1
    x0, x1 = x - r - 1, x + r
    y0, y1 = y - r - 1, y + r
    if endpoints == None:
        e = [0, 359]
    else:
        e = list(endpoints)
    while e[0] > e[1]: e[1] = e[1] + 360
    edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
    move_to(id, x0, y0)
 def edit(id, *args):
    _canvas.itemconfigure(id, **dict(args))
 def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
    global _canvas_x, _canvas_y
    x, y = pos
    font = (font, str(size), style)
    return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
 def changeText(id, newText, font=None, size=12, style='normal'):
    _canvas.itemconfigure(id, text=newText)
    if font != None:
        _canvas.itemconfigure(id, font=(font, '-%d' % size, style))
 def changeColor(id, newColor):
    _canvas.itemconfigure(id, fill=newColor)
 def line(here, there, color=formatColor(0, 0, 0), width=2):
    x0, y0 = here[0], here[1]
    x1, y1 = there[0], there[1]
    return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
 ##############################################################################
 ### Keypress handling ########################################################
 ##############################################################################
 # We bind to key-down and key-up events.
 _keysdown = {}
 _keyswaiting = {}
 # This holds an unprocessed key release.  We delay key releases by up to
 # one call to keys_pressed() to get round a problem with auto repeat.
 _got_release = None
 def _keypress(event):
    global _got_release
    #remap_arrows(event)
    _keysdown[event.keysym] = 1
    _keyswaiting[event.keysym] = 1
 #    print event.char, event.keycode
    _got_release = None
 def _keyrelease(event):
    global _got_release
    #remap_arrows(event)
    try:
        del _keysdown[event.keysym]
    except:
        pass
    _got_release = 1
 def remap_arrows(event):
    # TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
    if event.char in ['a', 's', 'd', 'w']:
        return
    if event.keycode in [37, 101]: # LEFT ARROW (win / x)
        event.char = 'a'
    if event.keycode in [38, 99]: # UP ARROW
        event.char = 'w'
    if event.keycode in [39, 102]: # RIGHT ARROW
        event.char = 'd'
    if event.keycode in [40, 104]: # DOWN ARROW
        event.char = 's'
 def _clear_keys(event=None):
    global _keysdown, _got_release, _keyswaiting
    _keysdown = {}
    _keyswaiting = {}
    _got_release = None
 def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
                 d_w=Tkinter.tkinter.DONT_WAIT):
    d_o_e(d_w)
    if _got_release:
        d_o_e(d_w)
    return _keysdown.keys()
 def keys_waiting():
    global _keyswaiting
    keys = _keyswaiting.keys()
    _keyswaiting = {}
    return keys
 # Block for a list of keys...
 def wait_for_keys():
    keys = []
    while keys == []:
        keys = keys_pressed()
        sleep(0.05)
    return keys
 def remove_from_screen(x,
                       d_o_e=Tkinter.tkinter.dooneevent,
                       d_w=Tkinter.tkinter.DONT_WAIT):
    _canvas.delete(x)
    d_o_e(d_w)
 def _adjust_coords(coord_list, x, y):
    for i in range(0, len(coord_list), 2):
        coord_list[i] = coord_list[i] + x
        coord_list[i + 1] = coord_list[i + 1] + y
    return coord_list
 def move_to(object, x, y=None,
            d_o_e=Tkinter.tkinter.dooneevent,
            d_w=Tkinter.tkinter.DONT_WAIT):
    if y is None:
        try: x, y = x
        except: raise  'incomprehensible coordinates'
    horiz = True
    newCoords = []
    current_x, current_y = _canvas.coords(object)[0:2] # first point
    for coord in  _canvas.coords(object):
        if horiz:
            inc = x - current_x
        else:
            inc = y - current_y
        horiz = not horiz
        newCoords.append(coord + inc)
    _canvas.coords(object, *newCoords)
    d_o_e(d_w)
 def move_by(object, x, y=None,
            d_o_e=Tkinter.tkinter.dooneevent,
            d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
    if y is None:
        try: x, y = x
        except: raise Exception, 'incomprehensible coordinates'
    horiz = True
    newCoords = []
    for coord in  _canvas.coords(object):
        if horiz:
            inc = x
        else:
            inc = y
        horiz = not horiz
        newCoords.append(coord + inc)
    _canvas.coords(object, *newCoords)
    d_o_e(d_w)
    if lift:
        _canvas.tag_raise(object)
 def writePostscript(filename):
    "Writes the current canvas to a postscript file."
    psfile = file(filename, 'w')
    psfile.write(_canvas.postscript(pageanchor='sw',
                     y='0.c',
                     x='0.c'))
    psfile.close()
 ghost_shape = [
    (0, - 0.5),
    (0.25, - 0.75),
    (0.5, - 0.5),
    (0.75, - 0.75),
    (0.75, 0.5),
    (0.5, 0.75),
    (- 0.5, 0.75),
    (- 0.75, 0.5),
    (- 0.75, - 0.75),
    (- 0.5, - 0.5),
    (- 0.25, - 0.75)
  ]
 if __name__ == '__main__':
    begin_graphics()
    clear_screen()
    ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
    g = polygon(ghost_shape, formatColor(1, 1, 1))
    move_to(g, (50, 50))
    circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
    sleep(2)
@@ -0,0 +1,585 @@
 # gridworld.py
 # ------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import random
 import sys
 import mdp
 import environment
 import util
 import optparse
 class Gridworld(mdp.MarkovDecisionProcess):
    """
      Gridworld
    """
    def __init__(self, grid):
        # layout
        if type(grid) == type([]): grid = makeGrid(grid)
        self.grid = grid
        # parameters
        self.livingReward = 0.0
        self.noise = 0.2
    def setLivingReward(self, reward):
        """
        The (negative) reward for exiting "normal" states.
        Note that in the R+N text, this reward is on entering
        a state and therefore is not clearly part of the state's
        future rewards.
        """
        self.livingReward = reward
    def setNoise(self, noise):
        """
        The probability of moving in an unintended direction.
        """
        self.noise = noise
    def getPossibleActions(self, state):
        """
        Returns list of valid actions for 'state'.
        Note that you can request moves into walls and
        that "exit" states transition to the terminal
        state under the special action "done".
        """
        if state == self.grid.terminalState:
            return ()
        x,y = state
        if type(self.grid[x][y]) == int:
            return ('exit',)
        return ('north','west','south','east')
    def getStates(self):
        """
        Return list of all states.
        """
        # The true terminal state.
        states = [self.grid.terminalState]
        for x in range(self.grid.width):
            for y in range(self.grid.height):
                if self.grid[x][y] != '#':
                    state = (x,y)
                    states.append(state)
        return states
    def getReward(self, state, action, nextState):
        """
        Get reward for state, action, nextState transition.
        Note that the reward depends only on the state being
        departed (as in the R+N book examples, which more or
        less use this convention).
        """
        if state == self.grid.terminalState:
            return 0.0
        x, y = state
        cell = self.grid[x][y]
        if type(cell) == int or type(cell) == float:
            return cell
        return self.livingReward
    def getStartState(self):
        for x in range(self.grid.width):
            for y in range(self.grid.height):
                if self.grid[x][y] == 'S':
                    return (x, y)
        raise 'Grid has no start state'
    def isTerminal(self, state):
        """
        Only the TERMINAL_STATE state is *actually* a terminal state.
        The other "exit" states are technically non-terminals with
        a single action "exit" which leads to the true terminal state.
        This convention is to make the grids line up with the examples
        in the R+N textbook.
        """
        return state == self.grid.terminalState
    def getTransitionStatesAndProbs(self, state, action):
        """
        Returns list of (nextState, prob) pairs
        representing the states reachable
        from 'state' by taking 'action' along
        with their transition probabilities.
        """
        if action not in self.getPossibleActions(state):
            raise "Illegal action!"
        if self.isTerminal(state):
            return []
        x, y = state
        if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float:
            termState = self.grid.terminalState
            return [(termState, 1.0)]
        successors = []
        northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state
        westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state
        southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state
        eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state
        if action == 'north' or action == 'south':
            if action == 'north':
                successors.append((northState,1-self.noise))
            else:
                successors.append((southState,1-self.noise))
            massLeft = self.noise
            successors.append((westState,massLeft/2.0))
            successors.append((eastState,massLeft/2.0))
        if action == 'west' or action == 'east':
            if action == 'west':
                successors.append((westState,1-self.noise))
            else:
                successors.append((eastState,1-self.noise))
            massLeft = self.noise
            successors.append((northState,massLeft/2.0))
            successors.append((southState,massLeft/2.0))
        successors = self.__aggregate(successors)
        return successors
    def __aggregate(self, statesAndProbs):
        counter = util.Counter()
        for state, prob in statesAndProbs:
            counter[state] += prob
        newStatesAndProbs = []
        for state, prob in counter.items():
            newStatesAndProbs.append((state, prob))
        return newStatesAndProbs
    def __isAllowed(self, y, x):
        if y < 0 or y >= self.grid.height: return False
        if x < 0 or x >= self.grid.width: return False
        return self.grid[x][y] != '#'
 class GridworldEnvironment(environment.Environment):
    def __init__(self, gridWorld):
        self.gridWorld = gridWorld
        self.reset()
    def getCurrentState(self):
        return self.state
    def getPossibleActions(self, state):
        return self.gridWorld.getPossibleActions(state)
    def doAction(self, action):
        state = self.getCurrentState()
        (nextState, reward) = self.getRandomNextState(state, action)
        self.state = nextState
        return (nextState, reward)
    def getRandomNextState(self, state, action, randObj=None):
        rand = -1.0
        if randObj is None:
            rand = random.random()
        else:
            rand = randObj.random()
        sum = 0.0
        successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
        for nextState, prob in successors:
            sum += prob
            if sum > 1.0:
                raise 'Total transition probability more than one; sample failure.'
            if rand < sum:
                reward = self.gridWorld.getReward(state, action, nextState)
                return (nextState, reward)
        raise 'Total transition probability less than one; sample failure.'
    def reset(self):
        self.state = self.gridWorld.getStartState()
 class Grid:
    """
    A 2-dimensional array of immutables backed by a list of lists.  Data is accessed
    via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
    y vertical and the origin (0,0) in the bottom left corner.
    The __str__ method constructs an output that is oriented appropriately.
    """
    def __init__(self, width, height, initialValue=' '):
        self.width = width
        self.height = height
        self.data = [[initialValue for y in range(height)] for x in range(width)]
        self.terminalState = 'TERMINAL_STATE'
    def __getitem__(self, i):
        return self.data[i]
    def __setitem__(self, key, item):
        self.data[key] = item
    def __eq__(self, other):
        if other == None: return False
        return self.data == other.data
    def __hash__(self):
        return hash(self.data)
    def copy(self):
        g = Grid(self.width, self.height)
        g.data = [x[:] for x in self.data]
        return g
    def deepCopy(self):
        return self.copy()
    def shallowCopy(self):
        g = Grid(self.width, self.height)
        g.data = self.data
        return g
    def _getLegacyText(self):
        t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
        t.reverse()
        return t
    def __str__(self):
        return str(self._getLegacyText())
 def makeGrid(gridString):
    width, height = len(gridString[0]), len(gridString)
    grid = Grid(width, height)
    for ybar, line in enumerate(gridString):
        y = height - ybar - 1
        for x, el in enumerate(line):
            grid[x][y] = el
    return grid
 def getCliffGrid():
    grid = [[' ',' ',' ',' ',' '],
            ['S',' ',' ',' ',10],
            [-100,-100, -100, -100, -100]]
    return Gridworld(makeGrid(grid))
 def getCliffGrid2():
    grid = [[' ',' ',' ',' ',' '],
            [8,'S',' ',' ',10],
            [-100,-100, -100, -100, -100]]
    return Gridworld(grid)
 def getDiscountGrid():
    grid = [[' ',' ',' ',' ',' '],
            [' ','#',' ',' ',' '],
            [' ','#', 1,'#', 10],
            ['S',' ',' ',' ',' '],
            [-10,-10, -10, -10, -10]]
    return Gridworld(grid)
 def getBridgeGrid():
    grid = [[ '#',-100, -100, -100, -100, -100, '#'],
            [   1, 'S',  ' ',  ' ',  ' ',  ' ',  10],
            [ '#',-100, -100, -100, -100, -100, '#']]
    return Gridworld(grid)
 def getBookGrid():
    grid = [[' ',' ',' ',+1],
            [' ','#',' ',-1],
            ['S',' ',' ',' ']]
    return Gridworld(grid)
 def getMazeGrid():
    grid = [[' ',' ',' ',+1],
            ['#','#',' ','#'],
            [' ','#',' ',' '],
            [' ','#','#',' '],
            ['S',' ',' ',' ']]
    return Gridworld(grid)
 def getUserAction(state, actionFunction):
    """
    Get an action from the user (rather than the agent).
    Used for debugging and lecture demos.
    """
    import graphicsUtils
    action = None
    while True:
        keys = graphicsUtils.wait_for_keys()
        if 'Up' in keys: action = 'north'
        if 'Down' in keys: action = 'south'
        if 'Left' in keys: action = 'west'
        if 'Right' in keys: action = 'east'
        if 'q' in keys: sys.exit(0)
        if action == None: continue
        break
    actions = actionFunction(state)
    if action not in actions:
        action = actions[0]
    return action
 def printString(x): print x
 def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: "+str(episode)+"\n")
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()
        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            return returns
        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise 'Error: Agent returned None action'
        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: "+str(state)+
                "\nTook action: "+str(action)+
                "\nEnded in state: "+str(nextState)+
                "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward)
        returns += reward * totalDiscount
        totalDiscount *= discount
    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()
 def parseOptions():
    optParser = optparse.OptionParser()
    optParser.add_option('-d', '--discount',action='store',
                         type='float',dest='discount',default=0.9,
                         help='Discount on future (default %default)')
    optParser.add_option('-r', '--livingReward',action='store',
                         type='float',dest='livingReward',default=0.0,
                         metavar="R", help='Reward for living for a time step (default %default)')
    optParser.add_option('-n', '--noise',action='store',
                         type='float',dest='noise',default=0.2,
                         metavar="P", help='How often action results in ' +
                         'unintended direction (default %default)' )
    optParser.add_option('-e', '--epsilon',action='store',
                         type='float',dest='epsilon',default=0.3,
                         metavar="E", help='Chance of taking a random action in q-learning (default %default)')
    optParser.add_option('-l', '--learningRate',action='store',
                         type='float',dest='learningRate',default=0.5,
                         metavar="P", help='TD learning rate (default %default)' )
    optParser.add_option('-i', '--iterations',action='store',
                         type='int',dest='iters',default=10,
                         metavar="K", help='Number of rounds of value iteration (default %default)')
    optParser.add_option('-k', '--episodes',action='store',
                         type='int',dest='episodes',default=1,
                         metavar="K", help='Number of epsiodes of the MDP to run (default %default)')
    optParser.add_option('-g', '--grid',action='store',
                         metavar="G", type='string',dest='grid',default="BookGrid",
                         help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' )
    optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150,
                         help='Request a window width of X pixels *per grid cell* (default %default)')
    optParser.add_option('-a', '--agent',action='store', metavar="A",
                         type='string',dest='agent',default="random",
                         help='Agent type (options are \'random\', \'value\' and \'q\', default %default)')
    optParser.add_option('-t', '--text',action='store_true',
                         dest='textDisplay',default=False,
                         help='Use text-only ASCII display')
    optParser.add_option('-p', '--pause',action='store_true',
                         dest='pause',default=False,
                         help='Pause GUI after each time step when running the MDP')
    optParser.add_option('-q', '--quiet',action='store_true',
                         dest='quiet',default=False,
                         help='Skip display of any learning episodes')
    optParser.add_option('-s', '--speed',action='store', metavar="S", type=float,
                         dest='speed',default=1.0,
                         help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)')
    optParser.add_option('-m', '--manual',action='store_true',
                         dest='manual',default=False,
                         help='Manually control agent')
    optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False,
                         help='Display each step of value iteration')
    opts, args = optParser.parse_args()
    if opts.manual and opts.agent != 'q':
        print '## Disabling Agents in Manual Mode (-m) ##'
        opts.agent = None
    # MANAGE CONFLICTS
    if opts.textDisplay or opts.quiet:
    # if opts.quiet:
        opts.pause = False
        # opts.manual = False
    if opts.manual:
        opts.pause = True
    return opts
 if __name__ == '__main__':
    opts = parseOptions()
    ###########################
    # GET THE GRIDWORLD
    ###########################
    import gridworld
    mdpFunction = getattr(gridworld, "get"+opts.grid)
    mdp = mdpFunction()
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = gridworld.GridworldEnvironment(mdp)
    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################
    import textGridworldDisplay
    display = textGridworldDisplay.TextGridworldDisplay(mdp)
    if not opts.textDisplay:
        import graphicsGridworldDisplay
        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
    try:
        display.start()
    except KeyboardInterrupt:
        sys.exit(0)
    ###########################
    # GET THE AGENT
    ###########################
    import valueIterationAgents, qlearningAgents
    a = None
    if opts.agent == 'value':
        a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
    elif opts.agent == 'q':
        #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        gridWorldEnv = GridworldEnvironment(mdp)
        actionFn = lambda state: mdp.getPossibleActions(state)
        qLearnOpts = {'gamma': opts.discount,
                      'alpha': opts.learningRate,
                      'epsilon': opts.epsilon,
                      'actionFn': actionFn}
        a = qlearningAgents.QLearningAgent(**qLearnOpts)
    elif opts.agent == 'random':
        # # No reason to use the random agent without episodes
        if opts.episodes == 0:
            opts.episodes = 10
        class RandomAgent:
            def getAction(self, state):
                return random.choice(mdp.getPossibleActions(state))
            def getValue(self, state):
                return 0.0
            def getQValue(self, state, action):
                return 0.0
            def getPolicy(self, state):
                "NOTE: 'random' is a special policy value; don't use it in your code."
                return 'random'
            def update(self, state, action, nextState, reward):
                pass
        a = RandomAgent()
    else:
        if not opts.manual: raise 'Unknown agent type: '+opts.agent
    ###########################
    # RUN EPISODES
    ###########################
    # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
    try:
        if not opts.manual and opts.agent == 'value':
            if opts.valueSteps:
                for i in range(opts.iters):
                    tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
                    display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
                    display.pause()
            display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
            display.pause()
            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
            display.pause()
    except KeyboardInterrupt:
        sys.exit(0)
    # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
    displayCallback = lambda x: None
    if not opts.quiet:
        if opts.manual and opts.agent == None:
            displayCallback = lambda state: display.displayNullValues(state)
        else:
            if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
            if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
            if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")
    messageCallback = lambda x: printString(x)
    if opts.quiet:
        messageCallback = lambda x: None
    # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
    pauseCallback = lambda : None
    if opts.pause:
        pauseCallback = lambda : display.pause()
    # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
    if opts.manual:
        decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
    else:
        decisionCallback = a.getAction
    # RUN EPISODES
    if opts.episodes > 0:
        print
        print "RUNNING", opts.episodes, "EPISODES"
        print
    returns = 0
    for episode in range(1, opts.episodes+1):
        returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
    if opts.episodes > 0:
        print
        print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)
        print
        print
    # DISPLAY POST-LEARNING VALUES / Q-VALUES
    if opts.agent == 'q' and not opts.manual:
        try:
            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
            display.pause()
            display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
            display.pause()
        except KeyboardInterrupt:
            sys.exit(0)
@@ -0,0 +1,84 @@
 # keyboardAgents.py
 # -----------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from game import Agent
 from game import Directions
 import random
 class KeyboardAgent(Agent):
    """
    An agent controlled by the keyboard.
    """
    # NOTE: Arrow keys also work.
    WEST_KEY  = 'a'
    EAST_KEY  = 'd'
    NORTH_KEY = 'w'
    SOUTH_KEY = 's'
    STOP_KEY = 'q'
    def __init__( self, index = 0 ):
        self.lastMove = Directions.STOP
        self.index = index
        self.keys = []
    def getAction( self, state):
        from graphicsUtils import keys_waiting
        from graphicsUtils import keys_pressed
        keys = keys_waiting() + keys_pressed()
        if keys != []:
            self.keys = keys
        legal = state.getLegalActions(self.index)
        move = self.getMove(legal)
        if move == Directions.STOP:
            # Try to move in the same direction as before
            if self.lastMove in legal:
                move = self.lastMove
        if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
        if move not in legal:
            move = random.choice(legal)
        self.lastMove = move
        return move
    def getMove(self, legal):
        move = Directions.STOP
        if   (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal:  move = Directions.WEST
        if   (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
        if   (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
        if   (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
        return move
 class KeyboardAgent2(KeyboardAgent):
    """
    A second agent controlled by the keyboard.
    """
    # NOTE: Arrow keys also work.
    WEST_KEY  = 'j'
    EAST_KEY  = "l"
    NORTH_KEY = 'i'
    SOUTH_KEY = 'k'
    STOP_KEY = 'u'
    def getMove(self, legal):
        move = Directions.STOP
        if   (self.WEST_KEY in self.keys) and Directions.WEST in legal:  move = Directions.WEST
        if   (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
        if   (self.NORTH_KEY in self.keys) and Directions.NORTH in legal:   move = Directions.NORTH
        if   (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
        return move
@@ -0,0 +1,149 @@
 # layout.py
 # ---------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from util import manhattanDistance
 from game import Grid
 import os
 import random
 VISIBILITY_MATRIX_CACHE = {}
 class Layout:
    """
    A Layout manages the static information about the game board.
    """
    def __init__(self, layoutText):
        self.width = len(layoutText[0])
        self.height= len(layoutText)
        self.walls = Grid(self.width, self.height, False)
        self.food = Grid(self.width, self.height, False)
        self.capsules = []
        self.agentPositions = []
        self.numGhosts = 0
        self.processLayoutText(layoutText)
        self.layoutText = layoutText
        self.totalFood = len(self.food.asList())
        # self.initializeVisibilityMatrix()
    def getNumGhosts(self):
        return self.numGhosts
    def initializeVisibilityMatrix(self):
        global VISIBILITY_MATRIX_CACHE
        if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
            from game import Directions
            vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
            dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
            vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
            for x in range(self.width):
                for y in range(self.height):
                    if self.walls[x][y] == False:
                        for vec, direction in zip(vecs, dirs):
                            dx, dy = vec
                            nextx, nexty = x + dx, y + dy
                            while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
                                vis[x][y][direction].add((nextx, nexty))
                                nextx, nexty = x + dx, y + dy
            self.visibility = vis
            VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
        else:
            self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
    def isWall(self, pos):
        x, col = pos
        return self.walls[x][col]
    def getRandomLegalPosition(self):
        x = random.choice(range(self.width))
        y = random.choice(range(self.height))
        while self.isWall( (x, y) ):
            x = random.choice(range(self.width))
            y = random.choice(range(self.height))
        return (x,y)
    def getRandomCorner(self):
        poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
        return random.choice(poses)
    def getFurthestCorner(self, pacPos):
        poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
        dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
        return pos
    def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
        row, col = [int(x) for x in pacPos]
        return ghostPos in self.visibility[row][col][pacDirection]
    def __str__(self):
        return "\n".join(self.layoutText)
    def deepCopy(self):
        return Layout(self.layoutText[:])
    def processLayoutText(self, layoutText):
        """
        Coordinates are flipped from the input format to the (x,y) convention here
        The shape of the maze.  Each character
        represents a different type of object.
         % - Wall
         . - Food
         o - Capsule
         G - Ghost
         P - Pacman
        Other characters are ignored.
        """
        maxY = self.height - 1
        for y in range(self.height):
            for x in range(self.width):
                layoutChar = layoutText[maxY - y][x]
                self.processLayoutChar(x, y, layoutChar)
        self.agentPositions.sort()
        self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
    def processLayoutChar(self, x, y, layoutChar):
        if layoutChar == '%':
            self.walls[x][y] = True
        elif layoutChar == '.':
            self.food[x][y] = True
        elif layoutChar == 'o':
            self.capsules.append((x, y))
        elif layoutChar == 'P':
            self.agentPositions.append( (0, (x, y) ) )
        elif layoutChar in ['G']:
            self.agentPositions.append( (1, (x, y) ) )
            self.numGhosts += 1
        elif layoutChar in  ['1', '2', '3', '4']:
            self.agentPositions.append( (int(layoutChar), (x,y)))
            self.numGhosts += 1
 def getLayout(name, back = 2):
    if name.endswith('.lay'):
        layout = tryToLoad('layouts/' + name)
        if layout == None: layout = tryToLoad(name)
    else:
        layout = tryToLoad('layouts/' + name + '.lay')
        if layout == None: layout = tryToLoad(name + '.lay')
    if layout == None and back >= 0:
        curdir = os.path.abspath('.')
        os.chdir('..')
        layout = getLayout(name, back -1)
        os.chdir(curdir)
    return layout
 def tryToLoad(fullname):
    if(not os.path.exists(fullname)): return None
    f = open(fullname)
    try: return Layout([line.strip() for line in f])
    finally: f.close()
@@ -0,0 +1,7 @@
 %%%%%%%%%%%%%%%%%%%
 %G.       G   ....%
 %.% % %%%%%% %.%%.%
 %.%o% %   o% %.o%.%
 %.%%%.%  %%% %..%.%
 %.....  P    %..%G%
 %%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,9 @@
 %%%%%%%%%%%%%%%%%%%%
 %o...%........%...o%
 %.%%.%.%%..%%.%.%%.%
 %...... G GG%......%
 %.%.%%.%% %%%.%%.%.%
 %.%....% ooo%.%..%.%
 %.%.%%.% %% %.%.%%.%
 %o%......P....%....%
 %%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,11 @@
 %%%%%%%%%%%%%%%%%%%%
 %o...%........%....%
 %.%%.%.%%%%%%.%.%%.%
 %.%..............%.%
 %.%.%%.%%  %%.%%.%.%
 %......%G  G%......%
 %.%.%%.%%%%%%.%%.%.%
 %.%..............%.%
 %.%%.%.%%%%%%.%.%%.%
 %....%...P....%...o%
 %%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,7 @@
 %%%%%%%%
 %P     %
 % .% . %
 %  %   %
 % .% . %
 %     G%
 %%%%%%%%
@@ -0,0 +1,5 @@
 %%%%%%%%%
 %.P    G% 
 % %.%G%%%  
 %G    %%% 
 %%%%%%%%%
@@ -0,0 +1,9 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%
 %.. P  ....      ....   %
 %..  ...  ...  ...  ... %
 %..  ...  ...  ...  ... %
 %..    ....      .... G %
 %..  ...  ...  ...  ... %
 %..  ...  ...  ...  ... %
 %..    ....      ....  o%
 %%%%%%%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,27 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %............%%............%
 %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 %o%%%%.%%%%%.%%.%%%%%.%%%%o%
 %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 %..........................%
 %.%%%%.%%.%%%%%%%%.%%.%%%%.%
 %.%%%%.%%.%%%%%%%%.%%.%%%%.%
 %......%%....%%....%%......%
 %%%%%%.%%%%% %% %%%%%.%%%%%%
 %%%%%%.%%%%% %% %%%%%.%%%%%%
 %%%%%%.%            %.%%%%%%
 %%%%%%.% %%%%  %%%% %.%%%%%%
 %     .  %G  GG  G%  .     %
 %%%%%%.% %%%%%%%%%% %.%%%%%%
 %%%%%%.%            %.%%%%%%
 %%%%%%.% %%%%%%%%%% %.%%%%%%
 %............%%............%
 %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 %.%%%%.%%%%%.%%.%%%%%.%%%%.%
 %o..%%.......  .......%%..o%
 %%%.%%.%%.%%%%%%%%.%%.%%.%%%
 %%%.%%.%%.%%%%%%%%.%%.%%.%%%
 %......%%....%%....%%......%
 %.%%%%%%%%%%.%%.%%%%%%%%%%.%
 %.............P............%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,7 @@
 %%%%%%%%%%%%%%%%%%%%
 %......%G  G%......%
 %.%%...%%  %%...%%.%
 %.%o.%........%.o%.%
 %.%%.%.%%%%%%.%.%%.%
 %........P.........%
 %%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,7 @@
 %%%%%%%
 % P   %
 % %%% %
 % %.  %
 % %%% %
 %. G  %
 %%%%%%%
@@ -0,0 +1,10 @@
 %%%%%
 % . %
 %.G.%
 % . %
 %. .%
 %   %
 %  .%
 %   %
 %P .%
 %%%%%
@@ -0,0 +1,5 @@
 %%%%%%%%
 %   P G%
 %G%%%%%%
 %....  %
 %%%%%%%%
@@ -0,0 +1,13 @@
 %%%%%%%%%%%%%%%%%%%%
 %o...%........%...o%
 %.%%.%.%%..%%.%.%%.%
 %.%.....%..%.....%.%
 %.%.%%.%%  %%.%%.%.%
 %...... GGGG%.%....%
 %.%....%%%%%%.%..%.%
 %.%....%  oo%.%..%.%
 %.%....% %%%%.%..%.%
 %.%...........%..%.%
 %.%%.%.%%%%%%.%.%%.%
 %o...%...P....%...o%
 %%%%%%%%%%%%%%%%%%%%
@@ -0,0 +1,258 @@
 # learningAgents.py
 # -----------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from game import Directions, Agent, Actions
 import random,util,time
 class ValueEstimationAgent(Agent):
    """
      Abstract agent which assigns values to (state,action)
      Q-Values for an environment. As well as a value to a
      state and a policy given respectively by,
      V(s) = max_{a in actions} Q(s,a)
      policy(s) = arg_max_{a in actions} Q(s,a)
      Both ValueIterationAgent and QLearningAgent inherit
      from this agent. While a ValueIterationAgent has
      a model of the environment via a MarkovDecisionProcess
      (see mdp.py) that is used to estimate Q-Values before
      ever actually acting, the QLearningAgent estimates
      Q-Values while acting in the environment.
    """
    def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
        """
        Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
        alpha    - learning rate
        epsilon  - exploration rate
        gamma    - discount factor
        numTraining - number of training episodes, i.e. no learning after these many episodes
        """
        self.alpha = float(alpha)
        self.epsilon = float(epsilon)
        self.discount = float(gamma)
        self.numTraining = int(numTraining)
    ####################################
    #    Override These Functions      #
    ####################################
    def getQValue(self, state, action):
        """
        Should return Q(state,action)
        """
        util.raiseNotDefined()
    def getValue(self, state):
        """
        What is the value of this state under the best action?
        Concretely, this is given by
        V(s) = max_{a in actions} Q(s,a)
        """
        util.raiseNotDefined()
    def getPolicy(self, state):
        """
        What is the best action to take in the state. Note that because
        we might want to explore, this might not coincide with getAction
        Concretely, this is given by
        policy(s) = arg_max_{a in actions} Q(s,a)
        If many actions achieve the maximal Q-value,
        it doesn't matter which is selected.
        """
        util.raiseNotDefined()
    def getAction(self, state):
        """
        state: can call state.getLegalActions()
        Choose an action and return it.
        """
        util.raiseNotDefined()
 class ReinforcementAgent(ValueEstimationAgent):
    """
      Abstract Reinforcemnt Agent: A ValueEstimationAgent
            which estimates Q-Values (as well as policies) from experience
            rather than a model
        What you need to know:
                    - The environment will call
                      observeTransition(state,action,nextState,deltaReward),
                      which will call update(state, action, nextState, deltaReward)
                      which you should override.
        - Use self.getLegalActions(state) to know which actions
                      are available in a state
    """
    ####################################
    #    Override These Functions      #
    ####################################
    def update(self, state, action, nextState, reward):
        """
                This class will call this function, which you write, after
                observing a transition and reward
        """
        util.raiseNotDefined()
    ####################################
    #    Read These Functions          #
    ####################################
    def getLegalActions(self,state):
        """
          Get the actions available for a given
          state. This is what you should use to
          obtain legal actions for a state
        """
        return self.actionFn(state)
    def observeTransition(self, state,action,nextState,deltaReward):
        """
            Called by environment to inform agent that a transition has
            been observed. This will result in a call to self.update
            on the same arguments
            NOTE: Do *not* override or call this function
        """
        self.episodeRewards += deltaReward
        self.update(state,action,nextState,deltaReward)
    def startEpisode(self):
        """
          Called by environment when new episode is starting
        """
        self.lastState = None
        self.lastAction = None
        self.episodeRewards = 0.0
    def stopEpisode(self):
        """
          Called by environment when episode is done
        """
        if self.episodesSoFar < self.numTraining:
            self.accumTrainRewards += self.episodeRewards
        else:
            self.accumTestRewards += self.episodeRewards
        self.episodesSoFar += 1
        if self.episodesSoFar >= self.numTraining:
            # Take off the training wheels
            self.epsilon = 0.0    # no exploration
            self.alpha = 0.0      # no learning
    def isInTraining(self):
        return self.episodesSoFar < self.numTraining
    def isInTesting(self):
        return not self.isInTraining()
    def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
        """
        actionFn: Function which takes a state and returns the list of legal actions
        alpha    - learning rate
        epsilon  - exploration rate
        gamma    - discount factor
        numTraining - number of training episodes, i.e. no learning after these many episodes
        """
        if actionFn == None:
            actionFn = lambda state: state.getLegalActions()
        self.actionFn = actionFn
        self.episodesSoFar = 0
        self.accumTrainRewards = 0.0
        self.accumTestRewards = 0.0
        self.numTraining = int(numTraining)
        self.epsilon = float(epsilon)
        self.alpha = float(alpha)
        self.discount = float(gamma)
    ################################
    # Controls needed for Crawler  #
    ################################
    def setEpsilon(self, epsilon):
        self.epsilon = epsilon
    def setLearningRate(self, alpha):
        self.alpha = alpha
    def setDiscount(self, discount):
        self.discount = discount
    def doAction(self,state,action):
        """
            Called by inherited class when
            an action is taken in a state
        """
        self.lastState = state
        self.lastAction = action
    ###################
    # Pacman Specific #
    ###################
    def observationFunction(self, state):
        """
            This is where we ended up after our last action.
            The simulation should somehow ensure this is called
        """
        if not self.lastState is None:
            reward = state.getScore() - self.lastState.getScore()
            self.observeTransition(self.lastState, self.lastAction, state, reward)
        return state
    def registerInitialState(self, state):
        self.startEpisode()
        if self.episodesSoFar == 0:
            print 'Beginning %d episodes of Training' % (self.numTraining)
    def final(self, state):
        """
          Called by Pacman game at the terminal state
        """
        deltaReward = state.getScore() - self.lastState.getScore()
        self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
        self.stopEpisode()
        # Make sure we have this var
        if not 'episodeStartTime' in self.__dict__:
            self.episodeStartTime = time.time()
        if not 'lastWindowAccumRewards' in self.__dict__:
            self.lastWindowAccumRewards = 0.0
        self.lastWindowAccumRewards += state.getScore()
        NUM_EPS_UPDATE = 100
        if self.episodesSoFar % NUM_EPS_UPDATE == 0:
            print 'Reinforcement Learning Status:'
            windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
            if self.episodesSoFar <= self.numTraining:
                trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
                print '\tCompleted %d out of %d training episodes' % (
                       self.episodesSoFar,self.numTraining)
                print '\tAverage Rewards over all training: %.2f' % (
                        trainAvg)
            else:
                testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
                print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
                print '\tAverage Rewards over testing: %.2f' % testAvg
            print '\tAverage Rewards for last %d episodes: %.2f'  % (
                    NUM_EPS_UPDATE,windowAvg)
            print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
            self.lastWindowAccumRewards = 0.0
            self.episodeStartTime = time.time()
        if self.episodesSoFar == self.numTraining:
            msg = 'Training Done (turning off epsilon and alpha)'
            print '%s\n%s' % (msg,'-' * len(msg))
@@ -0,0 +1,67 @@
 # mdp.py
 # ------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import random
 class MarkovDecisionProcess:
    def getStates(self):
        """
        Return a list of all states in the MDP.
        Not generally possible for large MDPs.
        """
        abstract
    def getStartState(self):
        """
        Return the start state of the MDP.
        """
        abstract
    def getPossibleActions(self, state):
        """
        Return list of possible actions from 'state'.
        """
        abstract
    def getTransitionStatesAndProbs(self, state, action):
        """
        Returns list of (nextState, prob) pairs
        representing the states reachable
        from 'state' by taking 'action' along
        with their transition probabilities.
        Note that in Q-Learning and reinforcment
        learning in general, we do not know these
        probabilities nor do we directly model them.
        """
        abstract
    def getReward(self, state, action, nextState):
        """
        Get the reward for the state, action, nextState transition.
        Not available in reinforcement learning.
        """
        abstract
    def isTerminal(self, state):
        """
        Returns true if the current state is a terminal state.  By convention,
        a terminal state has zero future rewards.  Sometimes the terminal state(s)
        may have no possible actions.  It is also common to think of the terminal
        state as having a self-loop action 'pass' with zero reward; the formulations
        are equivalent.
        """
        abstract
@@ -0,0 +1,684 @@
 # pacman.py
 # ---------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 """
 Pacman.py holds the logic for the classic pacman game along with the main
 code to run a game.  This file is divided into three sections:
  (i)  Your interface to the pacman world:
          Pacman is a complex environment.  You probably don't want to
          read through all of the code we wrote to make the game runs
          correctly.  This section contains the parts of the code
          that you will need to understand in order to complete the
          project.  There is also some code in game.py that you should
          understand.
  (ii)  The hidden secrets of pacman:
          This section contains all of the logic code that the pacman
          environment uses to decide who can move where, who dies when
          things collide, etc.  You shouldn't need to read this section
          of code, but you can if you want.
  (iii) Framework to start a game:
          The final section contains the code for reading the command
          you use to set up the game, then starting up a new game, along with
          linking in all the external parts (agent functions, graphics).
          Check this section out to see all the options available to you.
 To play your first game, type 'python pacman.py' from the command line.
 The keys are 'a', 's', 'd', and 'w' to move (or arrow keys).  Have fun!
 """
 from game import GameStateData
 from game import Game
 from game import Directions
 from game import Actions
 from util import nearestPoint
 from util import manhattanDistance
 import util, layout
 import sys, types, time, random, os
 ###################################################
 # YOUR INTERFACE TO THE PACMAN WORLD: A GameState #
 ###################################################
 class GameState:
    """
    A GameState specifies the full game state, including the food, capsules,
    agent configurations and score changes.
    GameStates are used by the Game object to capture the actual state of the game and
    can be used by agents to reason about the game.
    Much of the information in a GameState is stored in a GameStateData object.  We
    strongly suggest that you access that data via the accessor methods below rather
    than referring to the GameStateData object directly.
    Note that in classic Pacman, Pacman is always agent 0.
    """
    ####################################################
    # Accessor methods: use these to access state data #
    ####################################################
    # static variable keeps track of which states have had getLegalActions called
    explored = set()
    def getAndResetExplored():
        tmp = GameState.explored.copy()
        GameState.explored = set()
        return tmp
    getAndResetExplored = staticmethod(getAndResetExplored)
    def getLegalActions( self, agentIndex=0 ):
        """
        Returns the legal actions for the agent specified.
        """
 #        GameState.explored.add(self)
        if self.isWin() or self.isLose(): return []
        if agentIndex == 0:  # Pacman is moving
            return PacmanRules.getLegalActions( self )
        else:
            return GhostRules.getLegalActions( self, agentIndex )
    def generateSuccessor( self, agentIndex, action):
        """
        Returns the successor state after the specified agent takes the action.
        """
        # Check that successors exist
        if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.')
        # Copy current state
        state = GameState(self)
        # Let agent's logic deal with its action's effects on the board
        if agentIndex == 0:  # Pacman is moving
            state.data._eaten = [False for i in range(state.getNumAgents())]
            PacmanRules.applyAction( state, action )
        else:                # A ghost is moving
            GhostRules.applyAction( state, action, agentIndex )
        # Time passes
        if agentIndex == 0:
            state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around
        else:
            GhostRules.decrementTimer( state.data.agentStates[agentIndex] )
        # Resolve multi-agent effects
        GhostRules.checkDeath( state, agentIndex )
        # Book keeping
        state.data._agentMoved = agentIndex
        state.data.score += state.data.scoreChange
        GameState.explored.add(self)
        GameState.explored.add(state)
        return state
    def getLegalPacmanActions( self ):
        return self.getLegalActions( 0 )
    def generatePacmanSuccessor( self, action ):
        """
        Generates the successor state after the specified pacman move
        """
        return self.generateSuccessor( 0, action )
    def getPacmanState( self ):
        """
        Returns an AgentState object for pacman (in game.py)
        state.pos gives the current position
        state.direction gives the travel vector
        """
        return self.data.agentStates[0].copy()
    def getPacmanPosition( self ):
        return self.data.agentStates[0].getPosition()
    def getGhostStates( self ):
        return self.data.agentStates[1:]
    def getGhostState( self, agentIndex ):
        if agentIndex == 0 or agentIndex >= self.getNumAgents():
            raise Exception("Invalid index passed to getGhostState")
        return self.data.agentStates[agentIndex]
    def getGhostPosition( self, agentIndex ):
        if agentIndex == 0:
            raise Exception("Pacman's index passed to getGhostPosition")
        return self.data.agentStates[agentIndex].getPosition()
    def getGhostPositions(self):
        return [s.getPosition() for s in self.getGhostStates()]
    def getNumAgents( self ):
        return len( self.data.agentStates )
    def getScore( self ):
        return float(self.data.score)
    def getCapsules(self):
        """
        Returns a list of positions (x,y) of the remaining capsules.
        """
        return self.data.capsules
    def getNumFood( self ):
        return self.data.food.count()
    def getFood(self):
        """
        Returns a Grid of boolean food indicator variables.
        Grids can be accessed via list notation, so to check
        if there is food at (x,y), just call
        currentFood = state.getFood()
        if currentFood[x][y] == True: ...
        """
        return self.data.food
    def getWalls(self):
        """
        Returns a Grid of boolean wall indicator variables.
        Grids can be accessed via list notation, so to check
        if there is a wall at (x,y), just call
        walls = state.getWalls()
        if walls[x][y] == True: ...
        """
        return self.data.layout.walls
    def hasFood(self, x, y):
        return self.data.food[x][y]
    def hasWall(self, x, y):
        return self.data.layout.walls[x][y]
    def isLose( self ):
        return self.data._lose
    def isWin( self ):
        return self.data._win
    #############################################
    #             Helper methods:               #
    # You shouldn't need to call these directly #
    #############################################
    def __init__( self, prevState = None ):
        """
        Generates a new state by copying information from its predecessor.
        """
        if prevState != None: # Initial state
            self.data = GameStateData(prevState.data)
        else:
            self.data = GameStateData()
    def deepCopy( self ):
        state = GameState( self )
        state.data = self.data.deepCopy()
        return state
    def __eq__( self, other ):
        """
        Allows two states to be compared.
        """
        return hasattr(other, 'data') and self.data == other.data
    def __hash__( self ):
        """
        Allows states to be keys of dictionaries.
        """
        return hash( self.data )
    def __str__( self ):
        return str(self.data)
    def initialize( self, layout, numGhostAgents=1000 ):
        """
        Creates an initial game state from a layout array (see layout.py).
        """
        self.data.initialize(layout, numGhostAgents)
 ############################################################################
 #                     THE HIDDEN SECRETS OF PACMAN                         #
 #                                                                          #
 # You shouldn't need to look through the code in this section of the file. #
 ############################################################################
 SCARED_TIME = 40    # Moves ghosts are scared
 COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill
 TIME_PENALTY = 1 # Number of points lost each round
 class ClassicGameRules:
    """
    These game rules manage the control flow of a game, deciding when
    and how the game starts and ends.
    """
    def __init__(self, timeout=30):
        self.timeout = timeout
    def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False):
        agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
        initState = GameState()
        initState.initialize( layout, len(ghostAgents) )
        game = Game(agents, display, self, catchExceptions=catchExceptions)
        game.state = initState
        self.initialState = initState.deepCopy()
        self.quiet = quiet
        return game
    def process(self, state, game):
        """
        Checks to see whether it is time to end the game.
        """
        if state.isWin(): self.win(state, game)
        if state.isLose(): self.lose(state, game)
    def win( self, state, game ):
        if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score
        game.gameOver = True
    def lose( self, state, game ):
        if not self.quiet: print "Pacman died! Score: %d" % state.data.score
        game.gameOver = True
    def getProgress(self, game):
        return float(game.state.getNumFood()) / self.initialState.getNumFood()
    def agentCrash(self, game, agentIndex):
        if agentIndex == 0:
            print "Pacman crashed"
        else:
            print "A ghost crashed"
    def getMaxTotalTime(self, agentIndex):
        return self.timeout
    def getMaxStartupTime(self, agentIndex):
        return self.timeout
    def getMoveWarningTime(self, agentIndex):
        return self.timeout
    def getMoveTimeout(self, agentIndex):
        return self.timeout
    def getMaxTimeWarnings(self, agentIndex):
        return 0
 class PacmanRules:
    """
    These functions govern how pacman interacts with his environment under
    the classic game rules.
    """
    PACMAN_SPEED=1
    def getLegalActions( state ):
        """
        Returns a list of possible actions.
        """
        return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls )
    getLegalActions = staticmethod( getLegalActions )
    def applyAction( state, action ):
        """
        Edits the state to reflect the results of the action.
        """
        legal = PacmanRules.getLegalActions( state )
        if action not in legal:
            raise Exception("Illegal action " + str(action))
        pacmanState = state.data.agentStates[0]
        # Update Configuration
        vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED )
        pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector )
        # Eat
        next = pacmanState.configuration.getPosition()
        nearest = nearestPoint( next )
        if manhattanDistance( nearest, next ) <= 0.5 :
            # Remove food
            PacmanRules.consume( nearest, state )
    applyAction = staticmethod( applyAction )
    def consume( position, state ):
        x,y = position
        # Eat food
        if state.data.food[x][y]:
            state.data.scoreChange += 10
            state.data.food = state.data.food.copy()
            state.data.food[x][y] = False
            state.data._foodEaten = position
            # TODO: cache numFood?
            numFood = state.getNumFood()
            if numFood == 0 and not state.data._lose:
                state.data.scoreChange += 500
                state.data._win = True
        # Eat capsule
        if( position in state.getCapsules() ):
            state.data.capsules.remove( position )
            state.data._capsuleEaten = position
            # Reset all ghosts' scared timers
            for index in range( 1, len( state.data.agentStates ) ):
                state.data.agentStates[index].scaredTimer = SCARED_TIME
    consume = staticmethod( consume )
 class GhostRules:
    """
    These functions dictate how ghosts interact with their environment.
    """
    GHOST_SPEED=1.0
    def getLegalActions( state, ghostIndex ):
        """
        Ghosts cannot stop, and cannot turn around unless they
        reach a dead end, but can turn 90 degrees at intersections.
        """
        conf = state.getGhostState( ghostIndex ).configuration
        possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls )
        reverse = Actions.reverseDirection( conf.direction )
        if Directions.STOP in possibleActions:
            possibleActions.remove( Directions.STOP )
        if reverse in possibleActions and len( possibleActions ) > 1:
            possibleActions.remove( reverse )
        return possibleActions
    getLegalActions = staticmethod( getLegalActions )
    def applyAction( state, action, ghostIndex):
        legal = GhostRules.getLegalActions( state, ghostIndex )
        if action not in legal:
            raise Exception("Illegal ghost action " + str(action))
        ghostState = state.data.agentStates[ghostIndex]
        speed = GhostRules.GHOST_SPEED
        if ghostState.scaredTimer > 0: speed /= 2.0
        vector = Actions.directionToVector( action, speed )
        ghostState.configuration = ghostState.configuration.generateSuccessor( vector )
    applyAction = staticmethod( applyAction )
    def decrementTimer( ghostState):
        timer = ghostState.scaredTimer
        if timer == 1:
            ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos )
        ghostState.scaredTimer = max( 0, timer - 1 )
    decrementTimer = staticmethod( decrementTimer )
    def checkDeath( state, agentIndex):
        pacmanPosition = state.getPacmanPosition()
        if agentIndex == 0: # Pacman just moved; Anyone can kill him
            for index in range( 1, len( state.data.agentStates ) ):
                ghostState = state.data.agentStates[index]
                ghostPosition = ghostState.configuration.getPosition()
                if GhostRules.canKill( pacmanPosition, ghostPosition ):
                    GhostRules.collide( state, ghostState, index )
        else:
            ghostState = state.data.agentStates[agentIndex]
            ghostPosition = ghostState.configuration.getPosition()
            if GhostRules.canKill( pacmanPosition, ghostPosition ):
                GhostRules.collide( state, ghostState, agentIndex )
    checkDeath = staticmethod( checkDeath )
    def collide( state, ghostState, agentIndex):
        if ghostState.scaredTimer > 0:
            state.data.scoreChange += 200
            GhostRules.placeGhost(state, ghostState)
            ghostState.scaredTimer = 0
            # Added for first-person
            state.data._eaten[agentIndex] = True
        else:
            if not state.data._win:
                state.data.scoreChange -= 500
                state.data._lose = True
    collide = staticmethod( collide )
    def canKill( pacmanPosition, ghostPosition ):
        return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE
    canKill = staticmethod( canKill )
    def placeGhost(state, ghostState):
        ghostState.configuration = ghostState.start
    placeGhost = staticmethod( placeGhost )
 #############################
 # FRAMEWORK TO START A GAME #
 #############################
 def default(str):
    return str + ' [Default: %default]'
 def parseAgentArgs(str):
    if str == None: return {}
    pieces = str.split(',')
    opts = {}
    for p in pieces:
        if '=' in p:
            key, val = p.split('=')
        else:
            key,val = p, 1
        opts[key] = val
    return opts
 def readCommand( argv ):
    """
    Processes the command used to run pacman from the command line.
    """
    from optparse import OptionParser
    usageStr = """
    USAGE:      python pacman.py <options>
    EXAMPLES:   (1) python pacman.py
                    - starts an interactive game
                (2) python pacman.py --layout smallClassic --zoom 2
                OR  python pacman.py -l smallClassic -z 2
                    - starts an interactive game on a smaller board, zoomed in
    """
    parser = OptionParser(usageStr)
    parser.add_option('-n', '--numGames', dest='numGames', type='int',
                      help=default('the number of GAMES to play'), metavar='GAMES', default=1)
    parser.add_option('-l', '--layout', dest='layout',
                      help=default('the LAYOUT_FILE from which to load the map layout'),
                      metavar='LAYOUT_FILE', default='mediumClassic')
    parser.add_option('-p', '--pacman', dest='pacman',
                      help=default('the agent TYPE in the pacmanAgents module to use'),
                      metavar='TYPE', default='KeyboardAgent')
    parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics',
                      help='Display output as text only', default=False)
    parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics',
                      help='Generate minimal output and no graphics', default=False)
    parser.add_option('-g', '--ghosts', dest='ghost',
                      help=default('the ghost agent TYPE in the ghostAgents module to use'),
                      metavar = 'TYPE', default='RandomGhost')
    parser.add_option('-k', '--numghosts', type='int', dest='numGhosts',
                      help=default('The maximum number of ghosts to use'), default=4)
    parser.add_option('-z', '--zoom', type='float', dest='zoom',
                      help=default('Zoom the size of the graphics window'), default=1.0)
    parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed',
                      help='Fixes the random seed to always play the same game', default=False)
    parser.add_option('-r', '--recordActions', action='store_true', dest='record',
                      help='Writes game histories to a file (named by the time they were played)', default=False)
    parser.add_option('--replay', dest='gameToReplay',
                      help='A recorded game file (pickle) to replay', default=None)
    parser.add_option('-a','--agentArgs',dest='agentArgs',
                      help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"')
    parser.add_option('-x', '--numTraining', dest='numTraining', type='int',
                      help=default('How many episodes are training (suppresses output)'), default=0)
    parser.add_option('--frameTime', dest='frameTime', type='float',
                      help=default('Time to delay between frames; <0 means keyboard'), default=0.1)
    parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions',
                      help='Turns on exception handling and timeouts during games', default=False)
    parser.add_option('--timeout', dest='timeout', type='int',
                      help=default('Maximum length of time an agent can spend computing in a single game'), default=30)
    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = dict()
    # Fix the random seed
    if options.fixRandomSeed: random.seed('cs188')
    # Choose a layout
    args['layout'] = layout.getLayout( options.layout )
    if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found")
    # Choose a Pacman agent
    noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics)
    pacmanType = loadAgent(options.pacman, noKeyboard)
    agentOpts = parseAgentArgs(options.agentArgs)
    if options.numTraining > 0:
        args['numTraining'] = options.numTraining
        if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining
    pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs
    args['pacman'] = pacman
    # Don't display training games
    if 'numTrain' in agentOpts:
        options.numQuiet = int(agentOpts['numTrain'])
        options.numIgnore = int(agentOpts['numTrain'])
    # Choose a ghost agent
    ghostType = loadAgent(options.ghost, noKeyboard)
    args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )]
    # Choose a display format
    if options.quietGraphics:
        import textDisplay
        args['display'] = textDisplay.NullGraphics()
    elif options.textGraphics:
        import textDisplay
        textDisplay.SLEEP_TIME = options.frameTime
        args['display'] = textDisplay.PacmanGraphics()
    else:
        import graphicsDisplay
        args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime)
    args['numGames'] = options.numGames
    args['record'] = options.record
    args['catchExceptions'] = options.catchExceptions
    args['timeout'] = options.timeout
    # Special case: recorded games don't use the runGames method or args structure
    if options.gameToReplay != None:
        print 'Replaying recorded game %s.' % options.gameToReplay
        import cPickle
        f = open(options.gameToReplay)
        try: recorded = cPickle.load(f)
        finally: f.close()
        recorded['display'] = args['display']
        replayGame(**recorded)
        sys.exit(0)
    return args
 def loadAgent(pacman, nographics):
    # Looks through all pythonPath Directories for the right module,
    pythonPathStr = os.path.expandvars("$PYTHONPATH")
    if pythonPathStr.find(';') == -1:
        pythonPathDirs = pythonPathStr.split(':')
    else:
        pythonPathDirs = pythonPathStr.split(';')
    pythonPathDirs.append('.')
    for moduleDir in pythonPathDirs:
        if not os.path.isdir(moduleDir): continue
        moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')]
        for modulename in moduleNames:
            try:
                module = __import__(modulename[:-3])
            except ImportError:
                continue
            if pacman in dir(module):
                if nographics and modulename == 'keyboardAgents.py':
                    raise Exception('Using the keyboard requires graphics (not text display)')
                return getattr(module, pacman)
    raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.')
 def replayGame( layout, actions, display ):
    import pacmanAgents, ghostAgents
    rules = ClassicGameRules()
    agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())]
    game = rules.newGame( layout, agents[0], agents[1:], display )
    state = game.state
    display.initialize(state.data)
    for action in actions:
            # Execute the action
        state = state.generateSuccessor( *action )
        # Change the display
        display.update( state.data )
        # Allow for game specific conditions (winning, losing, etc.)
        rules.process(state, game)
    display.finish()
 def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ):
    import __main__
    __main__.__dict__['_display'] = display
    rules = ClassicGameRules(timeout)
    games = []
    for i in range( numGames ):
        beQuiet = i < numTraining
        if beQuiet:
                # Suppress output and graphics
            import textDisplay
            gameDisplay = textDisplay.NullGraphics()
            rules.quiet = True
        else:
            gameDisplay = display
            rules.quiet = False
        game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions)
        game.run()
        if not beQuiet: games.append(game)
        if record:
            import time, cPickle
            fname = ('recorded-game-%d' % (i + 1)) +  '-'.join([str(t) for t in time.localtime()[1:6]])
            f = file(fname, 'w')
            components = {'layout': layout, 'actions': game.moveHistory}
            cPickle.dump(components, f)
            f.close()
    if (numGames-numTraining) > 0:
        scores = [game.state.getScore() for game in games]
        wins = [game.state.isWin() for game in games]
        winRate = wins.count(True)/ float(len(wins))
        print 'Average Score:', sum(scores) / float(len(scores))
        print 'Scores:       ', ', '.join([str(score) for score in scores])
        print 'Win Rate:      %d/%d (%.2f)' % (wins.count(True), len(wins), winRate)
        print 'Record:       ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins])
    return games
 if __name__ == '__main__':
    """
    The main function called when pacman.py is run
    from the command line:
    > python pacman.py
    See the usage string for more details.
    > python pacman.py --help
    """
    args = readCommand( sys.argv[1:] ) # Get game components based on input
    runGames( **args )
    # import cProfile
    # cProfile.run("runGames( **args )")
    pass
@@ -0,0 +1,52 @@
 # pacmanAgents.py
 # ---------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from pacman import Directions
 from game import Agent
 import random
 import game
 import util
 class LeftTurnAgent(game.Agent):
    "An agent that turns left at every opportunity"
    def getAction(self, state):
        legal = state.getLegalPacmanActions()
        current = state.getPacmanState().configuration.direction
        if current == Directions.STOP: current = Directions.NORTH
        left = Directions.LEFT[current]
        if left in legal: return left
        if current in legal: return current
        if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
        if Directions.LEFT[left] in legal: return Directions.LEFT[left]
        return Directions.STOP
 class GreedyAgent(Agent):
    def __init__(self, evalFn="scoreEvaluation"):
        self.evaluationFunction = util.lookup(evalFn, globals())
        assert self.evaluationFunction != None
    def getAction(self, state):
        # Generate candidate actions
        legal = state.getLegalPacmanActions()
        if Directions.STOP in legal: legal.remove(Directions.STOP)
        successors = [(state.generateSuccessor(0, action), action) for action in legal]
        scored = [(self.evaluationFunction(state), action) for state, action in successors]
        bestScore = max(scored)[0]
        bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
        return random.choice(bestActions)
 def scoreEvaluation(state):
    return state.getScore()
@@ -0,0 +1,18 @@
 # projectParams.py
 # ----------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
 PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
 PROJECT_NAME = 'Project 3: Reinforcement learning'
 BONUS_PIC = False
@@ -0,0 +1,186 @@
 # qlearningAgents.py
 # ------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 from game import *
 from learningAgents import ReinforcementAgent
 from featureExtractors import *
 import random,util,math
 class QLearningAgent(ReinforcementAgent):
    """
      Q-Learning Agent
      Functions you should fill in:
        - computeValueFromQValues
        - computeActionFromQValues
        - getQValue
        - getAction
        - update
      Instance variables you have access to
        - self.epsilon (exploration prob)
        - self.alpha (learning rate)
        - self.discount (discount rate)
      Functions you should use
        - self.getLegalActions(state)
          which returns legal actions for a state
    """
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        "*** YOUR CODE HERE ***"
    def getQValue(self, state, action):
        """
          Returns Q(state,action)
          Should return 0.0 if we have never seen a state
          or the Q node value otherwise
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def computeValueFromQValues(self, state):
        """
          Returns max_action Q(state,action)
          where the max is over legal actions.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return a value of 0.0.
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def computeActionFromQValues(self, state):
        """
          Compute the best action to take in a state.  Note that if there
          are no legal actions, which is the case at the terminal state,
          you should return None.
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.
          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
        return action
    def update(self, state, action, nextState, reward):
        """
          The parent class calls this to observe a
          state = action => nextState and reward transition.
          You should do your Q-Value update here
          NOTE: You should never call this function,
          it will be called on your behalf
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def getPolicy(self, state):
        return self.computeActionFromQValues(state)
    def getValue(self, state):
        return self.computeValueFromQValues(state)
 class PacmanQAgent(QLearningAgent):
    "Exactly the same as QLearningAgent, but with different default parameters"
    def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
        """
        These default parameters can be changed from the pacman.py command line.
        For example, to change the exploration rate, try:
            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
        alpha    - learning rate
        epsilon  - exploration rate
        gamma    - discount factor
        numTraining - number of training episodes, i.e. no learning after these many episodes
        """
        args['epsilon'] = epsilon
        args['gamma'] = gamma
        args['alpha'] = alpha
        args['numTraining'] = numTraining
        self.index = 0  # This is always Pacman
        QLearningAgent.__init__(self, **args)
    def getAction(self, state):
        """
        Simply calls the getAction method of QLearningAgent and then
        informs parent of action for Pacman.  Do not change or remove this
        method.
        """
        action = QLearningAgent.getAction(self,state)
        self.doAction(state,action)
        return action
 class ApproximateQAgent(PacmanQAgent):
    """
       ApproximateQLearningAgent
       You should only have to overwrite getQValue
       and update.  All other QLearningAgent functions
       should work as is.
    """
    def __init__(self, extractor='IdentityExtractor', **args):
        self.featExtractor = util.lookup(extractor, globals())()
        PacmanQAgent.__init__(self, **args)
        self.weights = util.Counter()
    def getWeights(self):
        return self.weights
    def getQValue(self, state, action):
        """
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def update(self, state, action, nextState, reward):
        """
           Should update your weights based on transition
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def final(self, state):
        "Called at the end of each game."
        # call the super-class final method
        PacmanQAgent.final(self, state)
        # did we finish training?
        if self.episodesSoFar == self.numTraining:
            # you might want to print your weights here for debugging
            "*** YOUR CODE HERE ***"
            pass
@@ -0,0 +1,924 @@
 # reinforcementTestClasses.py
 # ---------------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import testClasses
 import random, math, traceback, sys, os
 import layout, textDisplay, pacman, gridworld
 import time
 from util import Counter, TimeoutFunction, FixedRandom
 from collections import defaultdict
 from pprint import PrettyPrinter
 from hashlib import sha1
 pp = PrettyPrinter()
 VERBOSE = False
 import gridworld
 LIVINGREWARD = -0.1
 NOISE = 0.2
 class ValueIterationTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(ValueIterationTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        iterations = int(testDict['valueIterations'])
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
        maxPreIterations = 10
        self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
        self.testOutFile = testDict['test_out_file']
        if maxPreIterations < iterations:
            self.numsIterationsForDisplay.append(iterations)
    def writeFailureFile(self, string):
        with open(self.testOutFile, 'w') as handle:
            handle.write(string)
    def removeFailureFileIfExists(self):
        if os.path.exists(self.testOutFile):
            os.remove(self.testOutFile)
    def execute(self, grades, moduleDict, solutionDict):
        failureOutputFileString = ''
        failureOutputStdString = ''
        for n in self.numsIterationsForDisplay:
            checkPolicy = (n == self.numsIterationsForDisplay[-1])
            testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
            failureOutputStdString += stdOutString
            failureOutputFileString += fileOutString
            if not testPass:
                self.addMessage(failureOutputStdString)
                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
                self.writeFailureFile(failureOutputFileString)
                return self.testFail(grades)
        self.removeFailureFileIfExists()
        return self.testPass(grades)
    def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy):
        testPass = True
        valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
        stdOutString = ''
        fileOutString = ''
        valuesKey = "values_k_%d" % n
        if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]):
            fileOutString += "Values at iteration %d are correct.\n" % n
            fileOutString += "   Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
        else:
            testPass = False
            outString = "Values at iteration %d are NOT correct.\n" % n
            outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
            outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey])
            stdOutString += outString
            fileOutString += outString
        for action in actions:
            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
            qValues = qValuesPretty[action]
            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
                fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action)
                fileOutString += "   Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
            else:
                testPass = False
                outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action)
                outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
                outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
                stdOutString += outString
                fileOutString += outString
        if checkPolicy:
            if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
                testPass = False
                outString = "Policy is NOT correct.\n"
                outString += "   Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty)
                outString += "   Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy'])
                stdOutString += outString
                fileOutString += outString
        return testPass, stdOutString, fileOutString
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            policyPretty = ''
            actions = []
            for n in self.numsIterationsForDisplay:
                valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
                handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty))
                for action in actions:
                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
            handle.write(self.prettyValueSolutionString('policy', policyPretty))
            handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n'))
        return True
    def runAgent(self, moduleDict, numIterations):
        agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations)
        states = self.grid.getStates()
        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
        values = {}
        qValues = {}
        policy = {}
        for state in states:
            values[state] = agent.getValue(state)
            policy[state] = agent.computeActionFromValues(state)
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if not qValues.has_key(action):
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.computeQValueFromValues(state, action)
                else:
                    qValues[action][state] = None
        valuesPretty = self.prettyValues(values)
        policyPretty = self.prettyPolicy(policy)
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (valuesPretty, qValuesPretty, actions, policyPretty)
    def prettyPrint(self, elements, formatString):
        pretty = ''
        states = self.grid.getStates()
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height-1-ybar
            row = []
            for x in range(self.grid.grid.width):
                if (x, y) in states:
                    value = elements[(x, y)]
                    if value is None:
                        row.append('   illegal')
                    else:
                        row.append(formatString.format(elements[(x,y)]))
                else:
                    row.append('_' * 10)
            pretty += '        %s\n' % ("   ".join(row), )
        pretty += '\n'
        return pretty
    def prettyValues(self, values):
        return self.prettyPrint(values, '{0:10.4f}')
    def prettyPolicy(self, policy):
        return self.prettyPrint(policy, '{0:10s}')
    def prettyValueSolutionString(self, name, pretty):
        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
        aList = self.parsePrettyValues(aPretty)
        bList = self.parsePrettyValues(bPretty)
        if len(aList) != len(bList):
            return False
        for a, b in zip(aList, bList):
            try:
                aNum = float(a)
                bNum = float(b)
                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
                error = abs(aNum - bNum)
                if error > tolerance:
                    return False
            except ValueError:
                if a.strip() != b.strip():
                    return False
        return True
    def parsePrettyValues(self, pretty):
        values = pretty.split()
        return values
 class ApproximateQLearningTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(ApproximateQLearningTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.extractor = 'IdentityExtractor'
        if 'extractor' in testDict:
            self.extractor = testDict['extractor']
        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
        numExperiences = int(testDict['numExperiences'])
        maxPreExperiences = 10
        self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
        self.testOutFile = testDict['test_out_file']
        if maxPreExperiences < numExperiences:
            self.numsExperiencesForDisplay.append(numExperiences)
    def writeFailureFile(self, string):
        with open(self.testOutFile, 'w') as handle:
            handle.write(string)
    def removeFailureFileIfExists(self):
        if os.path.exists(self.testOutFile):
            os.remove(self.testOutFile)
    def execute(self, grades, moduleDict, solutionDict):
        failureOutputFileString = ''
        failureOutputStdString = ''
        for n in self.numsExperiencesForDisplay:
            testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n)
            failureOutputStdString += stdOutString
            failureOutputFileString += fileOutString
            if not testPass:
                self.addMessage(failureOutputStdString)
                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
                self.writeFailureFile(failureOutputFileString)
                return self.testFail(grades)
        self.removeFailureFileIfExists()
        return self.testPass(grades)
    def executeNExperiences(self, grades, moduleDict, solutionDict, n):
        testPass = True
        qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n)
        stdOutString = ''
        fileOutString = "==================== Iteration %d ====================\n" % n
        if lastExperience is not None:
            fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
        weightsKey = 'weights_k_%d' % n
        if weights == eval(solutionDict[weightsKey]):
            fileOutString += "Weights at iteration %d are correct." % n
            fileOutString += "   Student/correct solution:\n\n%s\n\n" % pp.pformat(weights)
        for action in actions:
            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
            qValues = qValuesPretty[action]
            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
                fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
                fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
            else:
                testPass = False
                outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
                stdOutString += outString
                fileOutString += outString
        return testPass, stdOutString, fileOutString
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            for n in self.numsExperiencesForDisplay:
                qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n)
                handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights)))
                for action in actions:
                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
        return True
    def runAgent(self, moduleDict, numExperiences):
        agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
        states.sort()
        randObj = FixedRandom().random
        # choose a random start state and a random possible action from that state
        # get the next state and reward from the transition function
        lastExperience = None
        for i in range(numExperiences):
            startState = randObj.choice(states)
            action = randObj.choice(self.grid.getPossibleActions(startState))
            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
            lastExperience = (startState, action, endState, reward)
            agent.update(*lastExperience)
        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
        qValues = {}
        weights = agent.getWeights()
        for state in states:
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if not qValues.has_key(action):
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.getQValue(state, action)
                else:
                    qValues[action][state] = None
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (qValuesPretty, weights, actions, lastExperience)
    def prettyPrint(self, elements, formatString):
        pretty = ''
        states = self.grid.getStates()
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height-1-ybar
            row = []
            for x in range(self.grid.grid.width):
                if (x, y) in states:
                    value = elements[(x, y)]
                    if value is None:
                        row.append('   illegal')
                    else:
                        row.append(formatString.format(elements[(x,y)]))
                else:
                    row.append('_' * 10)
            pretty += '        %s\n' % ("   ".join(row), )
        pretty += '\n'
        return pretty
    def prettyValues(self, values):
        return self.prettyPrint(values, '{0:10.4f}')
    def prettyPolicy(self, policy):
        return self.prettyPrint(policy, '{0:10s}')
    def prettyValueSolutionString(self, name, pretty):
        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
        aList = self.parsePrettyValues(aPretty)
        bList = self.parsePrettyValues(bPretty)
        if len(aList) != len(bList):
            return False
        for a, b in zip(aList, bList):
            try:
                aNum = float(a)
                bNum = float(b)
                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
                error = abs(aNum - bNum)
                if error > tolerance:
                    return False
            except ValueError:
                if a.strip() != b.strip():
                    return False
        return True
    def parsePrettyValues(self, pretty):
        values = pretty.split()
        return values
 class QLearningTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(QLearningTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
        numExperiences = int(testDict['numExperiences'])
        maxPreExperiences = 10
        self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
        self.testOutFile = testDict['test_out_file']
        if maxPreExperiences < numExperiences:
            self.numsExperiencesForDisplay.append(numExperiences)
    def writeFailureFile(self, string):
        with open(self.testOutFile, 'w') as handle:
            handle.write(string)
    def removeFailureFileIfExists(self):
        if os.path.exists(self.testOutFile):
            os.remove(self.testOutFile)
    def execute(self, grades, moduleDict, solutionDict):
        failureOutputFileString = ''
        failureOutputStdString = ''
        for n in self.numsExperiencesForDisplay:
            checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1])
            testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
            failureOutputStdString += stdOutString
            failureOutputFileString += fileOutString
            if not testPass:
                self.addMessage(failureOutputStdString)
                self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
                self.writeFailureFile(failureOutputFileString)
                return self.testFail(grades)
        self.removeFailureFileIfExists()
        return self.testPass(grades)
    def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy):
        testPass = True
        valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n)
        stdOutString = ''
        fileOutString = "==================== Iteration %d ====================\n" % n
        if lastExperience is not None:
            fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
        for action in actions:
            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
            qValues = qValuesPretty[action]
            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
                fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
                fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
            else:
                testPass = False
                outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
                stdOutString += outString
                fileOutString += outString
        if checkValuesAndPolicy:
            if not self.comparePrettyValues(valuesPretty, solutionDict['values']):
                testPass = False
                outString = "Values are NOT correct."
                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty)
                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values'])
                stdOutString += outString
                fileOutString += outString
            if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
                testPass = False
                outString = "Policy is NOT correct."
                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty)
                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy'])
                stdOutString += outString
                fileOutString += outString
        return testPass, stdOutString, fileOutString
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            valuesPretty = ''
            policyPretty = ''
            for n in self.numsExperiencesForDisplay:
                valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n)
                for action in actions:
                    handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
            handle.write(self.prettyValueSolutionString('values', valuesPretty))
            handle.write(self.prettyValueSolutionString('policy', policyPretty))
        return True
    def runAgent(self, moduleDict, numExperiences):
        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
        states.sort()
        randObj = FixedRandom().random
        # choose a random start state and a random possible action from that state
        # get the next state and reward from the transition function
        lastExperience = None
        for i in range(numExperiences):
            startState = randObj.choice(states)
            action = randObj.choice(self.grid.getPossibleActions(startState))
            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
            lastExperience = (startState, action, endState, reward)
            agent.update(*lastExperience)
        actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
        values = {}
        qValues = {}
        policy = {}
        for state in states:
            values[state] = agent.computeValueFromQValues(state)
            policy[state] = agent.computeActionFromQValues(state)
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if not qValues.has_key(action):
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.getQValue(state, action)
                else:
                    qValues[action][state] = None
        valuesPretty = self.prettyValues(values)
        policyPretty = self.prettyPolicy(policy)
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
    def prettyPrint(self, elements, formatString):
        pretty = ''
        states = self.grid.getStates()
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height-1-ybar
            row = []
            for x in range(self.grid.grid.width):
                if (x, y) in states:
                    value = elements[(x, y)]
                    if value is None:
                        row.append('   illegal')
                    else:
                        row.append(formatString.format(elements[(x,y)]))
                else:
                    row.append('_' * 10)
            pretty += '        %s\n' % ("   ".join(row), )
        pretty += '\n'
        return pretty
    def prettyValues(self, values):
        return self.prettyPrint(values, '{0:10.4f}')
    def prettyPolicy(self, policy):
        return self.prettyPrint(policy, '{0:10s}')
    def prettyValueSolutionString(self, name, pretty):
        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
        aList = self.parsePrettyValues(aPretty)
        bList = self.parsePrettyValues(bPretty)
        if len(aList) != len(bList):
            return False
        for a, b in zip(aList, bList):
            try:
                aNum = float(a)
                bNum = float(b)
                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
                error = abs(aNum - bNum)
                if error > tolerance:
                    return False
            except ValueError:
                if a.strip() != b.strip():
                    return False
        return True
    def parsePrettyValues(self, pretty):
        values = pretty.split()
        return values
 class EpsilonGreedyTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(EpsilonGreedyTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.numExperiences = int(testDict['numExperiences'])
        self.numIterations = int(testDict['iterations'])
        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
    def execute(self, grades, moduleDict, solutionDict):
        if self.testEpsilonGreedy(moduleDict):
            return self.testPass(grades)
        else:
            return self.testFail(grades)
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            handle.write('# This is the solution file for %s.\n' % self.path)
            handle.write('# File intentionally blank.\n')
        return True
    def runAgent(self, moduleDict):
        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
        states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
        states.sort()
        randObj = FixedRandom().random
        # choose a random start state and a random possible action from that state
        # get the next state and reward from the transition function
        for i in range(self.numExperiences):
            startState = randObj.choice(states)
            action = randObj.choice(self.grid.getPossibleActions(startState))
            (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
            agent.update(startState, action, endState, reward)
        return agent
    def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
        agent = self.runAgent(moduleDict)
        for state in self.grid.getStates():
            numLegalActions = len(agent.getLegalActions(state))
            if numLegalActions <= 1:
                continue
            numGreedyChoices = 0
            optimalAction = agent.computeActionFromQValues(state)
            for iteration in range(self.numIterations):
                # assume that their computeActionFromQValues implementation is correct (q4 tests this)
                if agent.getAction(state) == optimalAction:
                    numGreedyChoices += 1
            # e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
            # g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
            empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
            empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions)
            empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
            error = abs(empiricalEpsilon - self.epsilon)
            if error > tolerance:
                self.addMessage("Epsilon-greedy action selection is not correct.")
                self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance))
                return False
        return True
 ### q6
 class Question6Test(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(Question6Test, self).__init__(question, testDict)
    def execute(self, grades, moduleDict, solutionDict):
        studentSolution = moduleDict['analysis'].question6()
        studentSolution = str(studentSolution).strip().lower()
        hashedSolution = sha1(studentSolution).hexdigest()
        if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415':
            return self.testPass(grades)
        else:
            self.addMessage("Solution is not correct.")
            self.addMessage("   Student solution: %s" % (studentSolution,))
            return self.testFail(grades)
    def writeSolution(self, moduleDict, filePath):
        handle = open(filePath, 'w')
        handle.write('# This is the solution file for %s.\n' % self.path)
        handle.write('# File intentionally blank.\n')
        handle.close()
        return True
 ### q7/q8
 ### =====
 ## Average wins of a pacman agent
 class EvalAgentTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(EvalAgentTest, self).__init__(question, testDict)
        self.pacmanParams = testDict['pacmanParams']
        self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None
        self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None
        self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None
        self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()]
        self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()]
        self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()]
        self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]])
    def execute(self, grades, moduleDict, solutionDict):
        self.addMessage('Grading agent using command:  python pacman.py %s'% (self.pacmanParams,))
        startTime = time.time()
        games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' ')))
        totalTime = time.time() - startTime
        numGames = len(games)
        stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True),
                 'games': games, 'scores': [g.state.getScore() for g in games],
                 'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)}
        averageScore = sum(stats['scores']) / float(len(stats['scores']))
        nonTimeouts = numGames - stats['timeouts']
        wins = stats['wins']
        def gradeThreshold(value, minimum, thresholds, name):
            points = 0
            passed = (minimum == None) or (value >= minimum)
            if passed:
                for t in thresholds:
                    if value >= t:
                        points += 1
            return (passed, points, value, minimum, thresholds, name)
        results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"),
                   gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"),
                   gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")]
        totalPoints = 0
        for passed, points, value, minimum, thresholds, name in results:
            if minimum == None and len(thresholds)==0:
                continue
            # print passed, points, value, minimum, thresholds, name
            totalPoints += points
            if not passed:
                assert points == 0
                self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum))
            else:
                self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds)))
            if minimum != None:
                self.addMessage("    Grading scheme:")
                self.addMessage("     < %s:  fail" % (minimum,))
                if len(thresholds)==0 or minimum != thresholds[0]:
                    self.addMessage("    >= %s:  0 points" % (minimum,))
                for idx, threshold in enumerate(thresholds):
                    self.addMessage("    >= %s:  %s points" % (threshold, idx+1))
            elif len(thresholds) > 0:
                self.addMessage("    Grading scheme:")
                self.addMessage("     < %s:  0 points" % (thresholds[0],))
                for idx, threshold in enumerate(thresholds):
                    self.addMessage("    >= %s:  %s points" % (threshold, idx+1))
        if any([not passed for passed, _, _, _, _, _ in results]):
            totalPoints = 0
        return self.testPartial(grades, totalPoints, self.maxPoints)
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            handle.write('# This is the solution file for %s.\n' % self.path)
            handle.write('# File intentionally blank.\n')
        return True
 ### q2/q3
 ### =====
 ## For each parameter setting, compute the optimal policy, see if it satisfies some properties
 def followPath(policy, start, numSteps=100):
    state = start
    path = []
    for i in range(numSteps):
        if state not in policy:
            break
        action = policy[state]
        path.append("(%s,%s)" % state)
        if action == 'north': nextState = state[0],state[1]+1
        if action == 'south': nextState = state[0],state[1]-1
        if action == 'east': nextState = state[0]+1,state[1]
        if action == 'west': nextState = state[0]-1,state[1]
        if action == 'exit' or action == None:
            path.append('TERMINAL_STATE')
            break
        state = nextState
    return path
 def parseGrid(string):
    grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')]
    for row in grid:
        for x, col in enumerate(row):
            try:
                col = int(col)
            except:
                pass
            if col == "_":
                col = ' '
            row[x] = col
    return gridworld.makeGrid(grid)
 def computePolicy(moduleDict, grid, discount):
    valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount)
    policy = {}
    for state in grid.getStates():
        policy[state] = valueIterator.computeActionFromValues(state)
    return policy
 class GridPolicyTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(GridPolicyTest, self).__init__(question, testDict)
        # Function in module in analysis that returns (discount, noise)
        self.parameterFn = testDict['parameterFn']
        self.question2 = testDict.get('question2', 'false').lower() == 'true'
        # GridWorld specification
        #    _ is empty space
        #    numbers are terminal states with that value
        #    # is a wall
        #    S is a start state
        #
        self.gridText = testDict['grid']
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.gridName = testDict['gridName']
        # Policy specification
        #    _                  policy choice not checked
        #    N, E, S, W policy action must be north, east, south, west
        #
        self.policy = parseGrid(testDict['policy'])
        # State the most probable path must visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.pathVisits = testDict.get('pathVisits', None)
        # State the most probable path must not visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.pathNotVisits = testDict.get('pathNotVisits', None)
    def execute(self, grades, moduleDict, solutionDict):
        if not hasattr(moduleDict['analysis'], self.parameterFn):
            self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,))
            return self.testFail(grades)
        result = getattr(moduleDict['analysis'], self.parameterFn)()
        if type(result) == str and result.lower()[0:3] == "not":
            self.addMessage('Actually, it is possible!')
            return self.testFail(grades)
        if self.question2:
            livingReward = None
            try:
                discount, noise = result
                discount = float(discount)
                noise = float(noise)
            except:
                self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result))
                return self.testFail(grades)
            if discount != 0.9 and noise != 0.2:
                self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,))
                return self.testFail(grades)
        else:
            try:
                discount, noise, livingReward = result
                discount = float(discount)
                noise = float(noise)
                livingReward = float(livingReward)
            except:
                self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result))
                return self.testFail(grades)
        self.grid.setNoise(noise)
        if livingReward != None:
            self.grid.setLivingReward(livingReward)
        start = self.grid.getStartState()
        policy = computePolicy(moduleDict, self.grid, discount)
        ## check policy
        actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'}
        width, height = self.policy.width, self.policy.height
        policyPassed = True
        for x in range(width):
            for y in range(height):
                if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]:
                    differPoint = (x,y)
                    policyPassed = False
        if not policyPassed:
            self.addMessage('Policy not correct.')
            self.addMessage('    Student policy at %s: %s' % (differPoint, policy[differPoint]))
            self.addMessage('    Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]]))
            self.addMessage('    Student policy:')
            self.printPolicy(policy, False)
            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
            self.addMessage("                 . at states where the policy is not defined (e.g. walls)")
            self.addMessage('    Correct policy specification:')
            self.printPolicy(self.policy, True)
            self.addMessage("        Legend:  N,S,E,W for states in which the student policy must move north etc,")
            self.addMessage("                 _ for states where it doesn't matter what the student policy does.")
            self.printGridworld()
            return self.testFail(grades)
        ## check path
        path = followPath(policy, self.grid.getStartState())
        if self.pathVisits != None and self.pathVisits not in path:
            self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,))
            self.addMessage('    States visited: %s' % (path,))
            self.addMessage('    Student policy:')
            self.printPolicy(policy, False)
            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
            self.addMessage("                 . at states where policy not defined")
            self.printGridworld()
            return self.testFail(grades)
        if self.pathNotVisits != None and self.pathNotVisits in path:
            self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,))
            self.addMessage('    States visited: %s' % (path,))
            self.addMessage('    Student policy:')
            self.printPolicy(policy, False)
            self.addMessage("        Legend:  N,S,E,W at states which move north etc, X at states which exit,")
            self.addMessage("                 . at states where policy not defined")
            self.printGridworld()
            return self.testFail(grades)
        return self.testPass(grades)
    def printGridworld(self):
        self.addMessage('    Gridworld:')
        for line in self.gridText.split('\n'):
            self.addMessage('     ' + line)
        self.addMessage('        Legend: # wall, _ empty, S start, numbers terminal states with that reward.')
    def printPolicy(self, policy, policyTypeIsGrid):
        if policyTypeIsGrid:
            legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'}
        else:
            legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'}
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height-1-ybar
            if policyTypeIsGrid:
                self.addMessage("        %s" % ("    ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),))
            else:
                self.addMessage("        %s" % ("    ".join([legend[policy.get((x,y), '.')]  for x in range(self.grid.grid.width)]),))
        # for state in sorted(self.grid.getStates()):
        #     if state != 'TERMINAL_STATE':
        #         self.addMessage('      (%s,%s) %s' % (state[0], state[1], policy[state]))
    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            handle.write('# This is the solution file for %s.\n' % self.path)
            handle.write('# File intentionally blank.\n')
        return True
@@ -0,0 +1,189 @@
 # testClasses.py
 # --------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 # import modules from python standard library
 import inspect
 import re
 import sys
 # Class which models a question in a project.  Note that questions have a
 # maximum number of points they are worth, and are composed of a series of
 # test cases
 class Question(object):
    def raiseNotDefined(self):
        print 'Method not implemented: %s' % inspect.stack()[1][3]
        sys.exit(1)
    def __init__(self, questionDict, display):
        self.maxPoints = int(questionDict['max_points'])
        self.testCases = []
        self.display = display
    def getDisplay(self):
        return self.display
    def getMaxPoints(self):
        return self.maxPoints
    # Note that 'thunk' must be a function which accepts a single argument,
    # namely a 'grading' object
    def addTestCase(self, testCase, thunk):
        self.testCases.append((testCase, thunk))
    def execute(self, grades):
        self.raiseNotDefined()
 # Question in which all test cases must be passed in order to receive credit
 class PassAllTestsQuestion(Question):
    def execute(self, grades):
        # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.
        testsFailed = False
        grades.assignZeroCredit()
        for _, f in self.testCases:
            if not f(grades):
                testsFailed = True
        if testsFailed:
            grades.fail("Tests failed.")
        else:
            grades.assignFullCredit()
 # Question in which predict credit is given for test cases with a ``points'' property.
 # All other tests are mandatory and must be passed.
 class HackedPartialCreditQuestion(Question):
    def execute(self, grades):
        # TODO: is this the right way to use grades?  The autograder doesn't seem to use it.
        grades.assignZeroCredit()
        points = 0
        passed = True
        for testCase, f in self.testCases:
            testResult = f(grades)
            if "points" in testCase.testDict:
                if testResult: points += float(testCase.testDict["points"])
            else:
                passed = passed and testResult
        ## FIXME: Below terrible hack to match q3's logic
        if int(points) == self.maxPoints and not passed:
            grades.assignZeroCredit()
        else:
            grades.addPoints(int(points))
 class Q6PartialCreditQuestion(Question):
    """Fails any test which returns False, otherwise doesn't effect the grades object.
    Partial credit tests will add the required points."""
    def execute(self, grades):
        grades.assignZeroCredit()
        results = []
        for _, f in self.testCases:
            results.append(f(grades))
        if False in results:
            grades.assignZeroCredit()
 class PartialCreditQuestion(Question):
    """Fails any test which returns False, otherwise doesn't effect the grades object.
    Partial credit tests will add the required points."""
    def execute(self, grades):
        grades.assignZeroCredit()
        for _, f in self.testCases:
            if not f(grades):
                grades.assignZeroCredit()
                grades.fail("Tests failed.")
                return False
 class NumberPassedQuestion(Question):
    """Grade is the number of test cases passed."""
    def execute(self, grades):
        grades.addPoints([f(grades) for _, f in self.testCases].count(True))
 # Template modeling a generic test case
 class TestCase(object):
    def raiseNotDefined(self):
        print 'Method not implemented: %s' % inspect.stack()[1][3]
        sys.exit(1)
    def getPath(self):
        return self.path
    def __init__(self, question, testDict):
        self.question = question
        self.testDict = testDict
        self.path = testDict['path']
        self.messages = []
    def __str__(self):
        self.raiseNotDefined()
    def execute(self, grades, moduleDict, solutionDict):
        self.raiseNotDefined()
    def writeSolution(self, moduleDict, filePath):
        self.raiseNotDefined()
        return True
    # Tests should call the following messages for grading
    # to ensure a uniform format for test output.
    #
    # TODO: this is hairy, but we need to fix grading.py's interface
    # to get a nice hierarchical project - question - test structure,
    # then these should be moved into Question proper.
    def testPass(self, grades):
        grades.addMessage('PASS: %s' % (self.path,))
        for line in self.messages:
            grades.addMessage('    %s' % (line,))
        return True
    def testFail(self, grades):
        grades.addMessage('FAIL: %s' % (self.path,))
        for line in self.messages:
            grades.addMessage('    %s' % (line,))
        return False
    # This should really be question level?
    #
    def testPartial(self, grades, points, maxPoints):
        grades.addPoints(points)
        extraCredit = max(0, points - maxPoints)
        regularCredit = points - extraCredit
        grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
        if extraCredit > 0:
            grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
        for line in self.messages:
            grades.addMessage('    %s' % (line,))
        return True
    def addMessage(self, message):
        self.messages.extend(message.split('\n'))
@@ -0,0 +1,85 @@
 # testParser.py
 # -------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import re
 import sys
 class TestParser(object):
    def __init__(self, path):
        # save the path to the test file
        self.path = path
    def removeComments(self, rawlines):
        # remove any portion of a line following a '#' symbol
        fixed_lines = []
        for l in rawlines:
            idx = l.find('#')
            if idx == -1:
                fixed_lines.append(l)
            else:
                fixed_lines.append(l[0:idx])
        return '\n'.join(fixed_lines)
    def parse(self):
        # read in the test case and remove comments
        test = {}
        with open(self.path) as handle:
            raw_lines = handle.read().split('\n')
        test_text = self.removeComments(raw_lines)
        test['__raw_lines__'] = raw_lines
        test['path'] = self.path
        test['__emit__'] = []
        lines = test_text.split('\n')
        i = 0
        # read a property in each loop cycle
        while(i < len(lines)):
            # skip blank lines
            if re.match('\A\s*\Z', lines[i]):
                test['__emit__'].append(("raw", raw_lines[i]))
                i += 1
                continue
            m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
            if m:
                test[m.group(1)] = m.group(2)
                test['__emit__'].append(("oneline", m.group(1)))
                i += 1
                continue
            m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
            if m:
                msg = []
                i += 1
                while(not re.match('\A\s*"""\s*\Z', lines[i])):
                    msg.append(raw_lines[i])
                    i += 1
                test[m.group(1)] = '\n'.join(msg)
                test['__emit__'].append(("multiline", m.group(1)))
                i += 1
                continue
            print 'error parsing test file: %s' % self.path
            sys.exit(1)
        return test
 def emitTestDict(testDict, handle):
    for kind, data in testDict['__emit__']:
        if kind == "raw":
            handle.write(data + "\n")
        elif kind == "oneline":
            handle.write('%s: "%s"\n' % (data, testDict[data]))
        elif kind == "multiline":
            handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
        else:
            raise Exception("Bad __emit__")
@@ -0,0 +1,410 @@
 values_k_0: """
            0.0000
            0.0000
            0.0000
 """
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 values_k_1: """
          -10.0000
            0.0000
           10.0000
 """
 q_values_k_1_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_1_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 values_k_2: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_2_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_2_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_2_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_3: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_3_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_3_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_3_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_4: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_4_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_4_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_4_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_5: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_5_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_5_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_5_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_6: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_6_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_6_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_6_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_7: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_7_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_7_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_7_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_8: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_8_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_8_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_8_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_9: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_9_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_9_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_9_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            2.5000
           illegal
 """
 values_k_100: """
          -10.0000
            5.0000
           10.0000
 """
 q_values_k_100_action_north: """
           illegal
           -5.0000
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            2.5000
           illegal
 """
 q_values_k_100_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_100_action_south: """
           illegal
            5.0000
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            2.5000
           illegal
 """
 policy: """
        exit      
        south     
        exit
 """
 actions: """
 north
 east
 exit
 south
 west
 """
@@ -0,0 +1,22 @@
 class: "ValueIterationTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.5"
 noise: "0.0"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,410 @@
 values_k_0: """
            0.0000
            0.0000
            0.0000
 """
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 values_k_1: """
          -10.0000
            0.0000
           10.0000
 """
 q_values_k_1_action_north: """
           illegal
           -5.6250
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_1_action_south: """
           illegal
            5.6250
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 values_k_2: """
          -10.0000
            5.6250
           10.0000
 """
 q_values_k_2_action_north: """
           illegal
           -4.5703
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            3.1641
           illegal
 """
 q_values_k_2_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_2_action_south: """
           illegal
            6.6797
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            3.1641
           illegal
 """
 values_k_3: """
          -10.0000
            6.6797
           10.0000
 """
 q_values_k_3_action_north: """
           illegal
           -4.3726
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            3.7573
           illegal
 """
 q_values_k_3_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_3_action_south: """
           illegal
            6.8774
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            3.7573
           illegal
 """
 values_k_4: """
          -10.0000
            6.8774
           10.0000
 """
 q_values_k_4_action_north: """
           illegal
           -4.3355
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            3.8686
           illegal
 """
 q_values_k_4_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_4_action_south: """
           illegal
            6.9145
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            3.8686
           illegal
 """
 values_k_5: """
          -10.0000
            6.9145
           10.0000
 """
 q_values_k_5_action_north: """
           illegal
           -4.3285
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            3.8894
           illegal
 """
 q_values_k_5_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_5_action_south: """
           illegal
            6.9215
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            3.8894
           illegal
 """
 values_k_6: """
          -10.0000
            6.9215
           10.0000
 """
 q_values_k_6_action_north: """
           illegal
           -4.3272
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            3.8933
           illegal
 """
 q_values_k_6_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_6_action_south: """
           illegal
            6.9228
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            3.8933
           illegal
 """
 values_k_7: """
          -10.0000
            6.9228
           10.0000
 """
 q_values_k_7_action_north: """
           illegal
           -4.3270
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            3.8941
           illegal
 """
 q_values_k_7_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_7_action_south: """
           illegal
            6.9230
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            3.8941
           illegal
 """
 values_k_8: """
          -10.0000
            6.9230
           10.0000
 """
 q_values_k_8_action_north: """
           illegal
           -4.3269
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            3.8942
           illegal
 """
 q_values_k_8_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_8_action_south: """
           illegal
            6.9231
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            3.8942
           illegal
 """
 values_k_9: """
          -10.0000
            6.9231
           10.0000
 """
 q_values_k_9_action_north: """
           illegal
           -4.3269
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            3.8942
           illegal
 """
 q_values_k_9_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_9_action_south: """
           illegal
            6.9231
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            3.8942
           illegal
 """
 values_k_100: """
          -10.0000
            6.9231
           10.0000
 """
 q_values_k_100_action_north: """
           illegal
           -4.3269
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            3.8942
           illegal
 """
 q_values_k_100_action_exit: """
          -10.0000
           illegal
           10.0000
 """
 q_values_k_100_action_south: """
           illegal
            6.9231
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            3.8942
           illegal
 """
 policy: """
        exit      
        south     
        exit
 """
 actions: """
 north
 east
 exit
 south
 west
 """
@@ -0,0 +1,22 @@
 class: "ValueIterationTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.75"
 noise: "0.25"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,678 @@
 values_k_0: """
        __________       0.0000   __________
            0.0000       0.0000       0.0000
            0.0000       0.0000       0.0000
            0.0000       0.0000       0.0000
            0.0000       0.0000       0.0000
            0.0000       0.0000       0.0000
        __________       0.0000   __________
 """
 q_values_k_0_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_0_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 values_k_1: """
        __________      10.0000   __________
         -100.0000       0.0000    -100.0000
         -100.0000       0.0000    -100.0000
         -100.0000       0.0000    -100.0000
         -100.0000       0.0000    -100.0000
         -100.0000       0.0000    -100.0000
        __________       1.0000   __________
 """
 q_values_k_1_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_east: """
        __________      illegal   __________
           illegal     -76.0750      illegal
           illegal     -76.5000      illegal
           illegal     -76.5000      illegal
           illegal     -76.5000      illegal
           illegal     -76.4575      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_1_action_south: """
        __________      illegal   __________
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
           illegal      -8.5000      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_west: """
        __________      illegal   __________
           illegal     -76.0750      illegal
           illegal     -76.5000      illegal
           illegal     -76.5000      illegal
           illegal     -76.5000      illegal
           illegal     -76.4575      illegal
        __________      illegal   __________
 """
 values_k_2: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -8.5000    -100.0000
         -100.0000      -8.5000    -100.0000
         -100.0000      -8.5000    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_2_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.0025      illegal
           illegal     -15.0025      illegal
           illegal     -15.0025      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_east: """
        __________      illegal   __________
           illegal     -76.4363      illegal
           illegal     -76.8974      illegal
           illegal     -77.2225      illegal
           illegal     -77.1900      illegal
           illegal     -76.8187      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_2_action_south: """
        __________      illegal   __________
           illegal     -15.0025      illegal
           illegal     -15.0025      illegal
           illegal     -15.0025      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_west: """
        __________      illegal   __________
           illegal     -76.4363      illegal
           illegal     -76.8974      illegal
           illegal     -77.2225      illegal
           illegal     -77.1900      illegal
           illegal     -76.8187      illegal
        __________      illegal   __________
 """
 values_k_3: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.0025    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_3_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -19.9769      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1737      illegal
           illegal     -77.5016      illegal
           illegal     -77.4663      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_3_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -19.9769      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1737      illegal
           illegal     -77.5016      illegal
           illegal     -77.4663      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_4: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_4_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_4_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_5: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_5_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_5_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_6: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_6_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_6_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_7: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_7_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_7_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_8: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_8_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_8_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_9: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_9_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_9_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 values_k_100: """
        __________      10.0000   __________
         -100.0000      -0.8500    -100.0000
         -100.0000      -9.1502    -100.0000
         -100.0000     -15.4999    -100.0000
         -100.0000     -14.4173    -100.0000
         -100.0000      -7.7350    -100.0000
        __________       1.0000   __________
 """
 q_values_k_100_action_north: """
        __________      illegal   __________
           illegal      -0.8500      illegal
           illegal      -9.1502      illegal
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
        __________      illegal   __________
 """
 q_values_k_100_action_east: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 q_values_k_100_action_exit: """
        __________      10.0000   __________
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
         -100.0000      illegal    -100.0000
        __________       1.0000   __________
 """
 q_values_k_100_action_south: """
        __________      illegal   __________
           illegal     -15.4999      illegal
           illegal     -20.3575      illegal
           illegal     -19.5292      illegal
           illegal     -14.4173      illegal
           illegal      -7.7350      illegal
        __________      illegal   __________
 """
 q_values_k_100_action_west: """
        __________      illegal   __________
           illegal     -76.4639      illegal
           illegal     -77.1949      illegal
           illegal     -77.5016      illegal
           illegal     -77.4875      illegal
           illegal     -77.0702      illegal
        __________      illegal   __________
 """
 policy: """
        __________   exit         __________
        exit         north        exit      
        exit         north        exit      
        exit         north        exit      
        exit         south        exit      
        exit         south        exit      
        __________   exit         __________
 """
 actions: """
 north
 east
 exit
 south
 west
 """
@@ -0,0 +1,27 @@
 class: "ValueIterationTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
    #   10    #
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    S -100
    #    1    #
 """
 gridName: "bridgeGrid"
 discount: "0.85"
 noise: "0.1"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "500"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,544 @@
 values_k_0: """
            0.0000       0.0000       0.0000       0.0000       0.0000
            0.0000       0.0000   __________       0.0000       0.0000
            0.0000       0.0000       0.0000       0.0000       0.0000
            0.0000       0.0000   __________   __________       0.0000
            0.0000       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_0_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 values_k_1: """
          -10.0000       0.0000      10.0000       0.0000       0.0000
          -10.0000       0.0000   __________       0.0000       0.0000
          -10.0000       0.0000       1.0000       0.0000       0.0000
          -10.0000       0.0000   __________   __________       0.0000
          -10.0000       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_north: """
           illegal       0.0000      illegal       0.9000       0.0000
           illegal      -0.9000   __________       0.0000       0.0000
           illegal      -0.8100      illegal       0.0900       0.0000
           illegal      -0.9000   __________   __________       0.0000
           illegal      -0.9000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_east: """
           illegal       7.2000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.7200      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_1_action_south: """
           illegal       0.0000      illegal       0.9000       0.0000
           illegal      -0.9000   __________       0.0000       0.0000
           illegal      -0.8100      illegal       0.0900       0.0000
           illegal      -0.9000   __________   __________       0.0000
           illegal      -0.9000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_west: """
           illegal      -7.2000      illegal       7.2000       0.0000
           illegal      -7.2000   __________       0.0000       0.0000
           illegal      -7.2000      illegal       0.7200       0.0000
           illegal      -7.2000   __________   __________       0.0000
           illegal      -7.2000       0.0000       0.0000       0.0000
 """
 values_k_2: """
          -10.0000       7.2000      10.0000       7.2000       0.0000
          -10.0000       0.0000   __________       0.0000       0.0000
          -10.0000       0.7200       1.0000       0.7200       0.0000
          -10.0000       0.0000   __________   __________       0.0000
          -10.0000       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_north: """
           illegal       5.1840      illegal       6.0840       0.6480
           illegal       4.2840   __________       5.1840       0.0000
           illegal      -0.8100      illegal       0.0900       0.0648
           illegal      -0.3816   __________   __________       0.0000
           illegal      -0.9000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_east: """
           illegal       7.8480      illegal       0.6480       0.0000
           illegal       0.7128   __________       0.7128       0.0000
           illegal       0.7200      illegal       0.0648       0.0000
           illegal       0.0648   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_2_action_south: """
           illegal       0.0000      illegal       0.9000       0.6480
           illegal      -0.3816   __________       0.5184       0.0000
           illegal      -0.8100      illegal       0.6084       0.0648
           illegal      -0.9000   __________   __________       0.0000
           illegal      -0.9000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_west: """
           illegal      -6.5520      illegal       7.8480       5.1840
           illegal      -6.4872   __________       0.7128       0.0000
           illegal      -7.2000      illegal       0.7848       0.5184
           illegal      -7.1352   __________   __________       0.0000
           illegal      -7.2000       0.0000       0.0000       0.0000
 """
 values_k_3: """
          -10.0000       7.8480      10.0000       7.8480       5.1840
          -10.0000       4.2840   __________       5.1840       0.0000
          -10.0000       0.7200       1.0000       0.7848       0.5184
          -10.0000       0.0648   __________   __________       0.0000
          -10.0000       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_north: """
           illegal       5.6506      illegal       7.0171       4.9054
           illegal       5.1361   __________       6.1171       4.1990
           illegal       2.2745      illegal       3.8691       0.1173
           illegal      -0.3758   __________   __________       0.3732
           illegal      -0.8533       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_east: """
           illegal       8.2919      illegal       4.9054       4.1990
           illegal       3.8556   __________       0.7770       0.5132
           illegal       1.1114      illegal       0.9104       0.3732
           illegal       0.1115   __________   __________       0.0467
           illegal       0.0058       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_3_action_south: """
           illegal       3.0845      illegal       5.0990       1.1729
           illegal       0.0040   __________       1.0316       0.8398
           illegal      -0.7633      illegal       0.7017       0.1173
           illegal      -0.8942   __________   __________       0.0000
           illegal      -0.9000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_west: """
           illegal      -6.1081      illegal       8.3729       6.1171
           illegal      -6.4289   __________       4.5094       4.2457
           illegal      -6.8086      illegal       1.2572       0.5651
           illegal      -7.1352   __________   __________       0.0467
           illegal      -7.1942       0.0000       0.0000       0.0000
 """
 values_k_4: """
          -10.0000       8.2919      10.0000       8.3729       6.1171
          -10.0000       5.1361   __________       6.1171       4.2457
          -10.0000       2.2745       1.0000       3.8691       0.5651
          -10.0000       0.1115   __________   __________       0.3732
          -10.0000       0.0058       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_north: """
           illegal       5.9702      illegal       7.4790       5.7084
           illegal       5.5324   __________       6.9611       5.3370
           illegal       2.8880      illegal       4.5452       3.4560
           illegal       0.7477   __________   __________       0.4740
           illegal      -0.8198       0.0005       0.0000       0.2687
 """
 q_values_k_4_action_east: """
           illegal       8.4085      illegal       5.7084       5.3370
           illegal       4.6490   __________       4.1587       3.6583
           illegal       1.1923      illegal       1.3056       0.8225
           illegal       0.2855   __________   __________       0.3196
           illegal       0.0106       0.0000       0.0000       0.0336
 """
 q_values_k_4_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_4_action_south: """
           illegal       3.6980      illegal       5.8549       4.3610
           illegal       1.1999   __________       3.7184       1.3395
           illegal      -0.7298      illegal       2.9266       0.6678
           illegal      -0.8858   __________   __________       0.0672
           illegal      -0.8958       0.0005       0.0000       0.0000
 """
 q_values_k_4_action_west: """
           illegal      -5.9915      illegal       8.5041       6.9611
           illegal      -6.2490   __________       5.5061       5.0057
           illegal      -6.7277      illegal       1.6188       3.2015
           illegal      -6.9948   __________   __________       0.3196
           illegal      -7.1894       0.0042       0.0000       0.0336
 """
 values_k_5: """
          -10.0000       8.4085      10.0000       8.5041       6.9611
          -10.0000       5.5324   __________       6.9611       5.3370
          -10.0000       2.8880       1.0000       4.5452       3.4560
          -10.0000       0.7477   __________   __________       0.4740
          -10.0000       0.0106       0.0042       0.0000       0.2687
 """
 q_values_k_5_action_north: """
           illegal       6.0541      illegal       7.6495       6.4039
           illegal       5.6521   __________       7.2298       6.1188
           illegal       3.1733      illegal       5.4130       4.5627
           illegal       1.2467   __________   __________       2.5736
           illegal      -0.3613       0.0040       0.0246       0.3655
 """
 q_values_k_5_action_east: """
           illegal       8.4547      illegal       6.4039       6.1188
           illegal       5.0000   __________       5.0171       4.7802
           illegal       1.2852      illegal       3.5239       3.0113
           illegal       0.7992   __________   __________       0.6765
           illegal       0.0713       0.0008       0.1935       0.2603
 """
 q_values_k_5_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_5_action_south: """
           illegal       3.9833      illegal       6.5385       5.2345
           illegal       1.6773   __________       4.3794       3.5951
           illegal      -0.2717      illegal       3.6736       1.0614
           illegal      -0.8251   __________   __________       0.2788
           illegal      -0.8920       0.0040       0.0246       0.2177
 """
 q_values_k_5_action_west: """
           illegal      -5.9453      illegal       8.5919       7.2298
           illegal      -6.1833   __________       6.1864       5.9496
           illegal      -6.6348      illegal       1.7556       3.7955
           illegal      -6.9391   __________   __________       0.6765
           illegal      -7.1318       0.0084       0.0030       0.0668
 """
 values_k_6: """
          -10.0000       8.4547      10.0000       8.5919       7.2298
          -10.0000       5.6521   __________       7.2298       6.1188
          -10.0000       3.1733       1.0000       5.4130       4.5627
          -10.0000       1.2467   __________   __________       2.5736
          -10.0000       0.0713       0.0084       0.1935       0.3655
 """
 q_values_k_6_action_north: """
           illegal       6.0874      illegal       7.7368       6.6294
           illegal       5.6961   __________       7.3875       6.4068
           illegal       3.2595      illegal       5.7061       5.3034
           illegal       1.4970   __________   __________       3.7484
           illegal      -0.0017       0.0298       0.1730       1.9033
 """
 q_values_k_6_action_east: """
           illegal       8.4696      illegal       6.6294       6.4068
           illegal       5.1160   __________       5.6660       5.4669
           illegal       1.3409      illegal       4.4230       4.0675
           illegal       1.1896   __________   __________       2.2966
           illegal       0.1246       0.1408       0.2980       0.5277
 """
 q_values_k_6_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_6_action_south: """
           illegal       4.0695      illegal       6.7561       5.8295
           illegal       1.8935   __________       5.0988       4.4865
           illegal       0.0876      illegal       4.3980       2.7508
           illegal      -0.7365   __________   __________       0.7264
           illegal      -0.8479       0.0298       0.1730       0.3135
 """
 q_values_k_6_action_west: """
           illegal      -5.9304      illegal       8.6239       7.3875
           illegal      -6.1535   __________       6.4659       6.2668
           illegal      -6.5791      illegal       1.8579       4.6797
           illegal      -6.9080   __________   __________       2.2966
           illegal      -7.0814       0.0528       0.0408       0.4038
 """
 values_k_7: """
          -10.0000       8.4696      10.0000       8.6239       7.3875
          -10.0000       5.6961   __________       7.3875       6.4068
          -10.0000       3.2595       1.0000       5.7061       5.3034
          -10.0000       1.4970   __________   __________       3.7484
          -10.0000       0.1246       0.1408       0.2980       1.9033
 """
 q_values_k_7_action_north: """
           illegal       6.0981      illegal       7.7741       6.7600
           illegal       5.7108   __________       7.4507       6.5605
           illegal       3.2912      illegal       5.8863       5.6038
           illegal       1.5816   __________   __________       4.4932
           illegal       0.1905       0.1394       0.3985       2.8970
 """
 q_values_k_7_action_east: """
           illegal       8.4749      illegal       6.7600       6.5605
           illegal       5.1568   __________       5.9026       5.7551
           illegal       1.3674      illegal       4.9969       4.7324
           illegal       1.3824   __________   __________       3.3475
           illegal       0.2473       0.2399       1.4240       1.8790
 """
 q_values_k_7_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_7_action_south: """
           illegal       4.1012      illegal       6.8839       6.0539
           illegal       1.9595   __________       5.3499       5.0599
           illegal       0.2678      illegal       4.6757       3.6897
           illegal      -0.6755   __________   __________       2.0451
           illegal      -0.7976       0.1394       0.3985       1.5685
 """
 q_values_k_7_action_west: """
           illegal      -5.9251      illegal       8.6410       7.4507
           illegal      -6.1444   __________       6.6087       6.4612
           illegal      -6.5526      illegal       1.8984       5.0224
           illegal      -6.8954   __________   __________       3.3475
           illegal      -7.0541       0.1151       0.1550       0.7232
 """
 values_k_8: """
          -10.0000       8.4749      10.0000       8.6410       7.4507
          -10.0000       5.7108   __________       7.4507       6.5605
          -10.0000       3.2912       1.0000       5.8863       5.6038
          -10.0000       1.5816   __________   __________       4.4932
          -10.0000       0.2473       0.2399       1.4240       2.8970
 """
 q_values_k_8_action_north: """
           illegal       6.1019      illegal       7.7921       6.8128
           illegal       5.7159   __________       7.4826       6.6255
           illegal       3.3017      illegal       5.9589       5.7577
           illegal       1.6120   __________   __________       4.8435
           illegal       0.2603       0.3231       1.3076       3.6240
 """
 q_values_k_8_action_east: """
           illegal       8.4767      illegal       6.8128       6.6255
           illegal       5.1707   __________       6.0310       5.8985
           illegal       1.3763      illegal       5.2350       5.0295
           illegal       1.4572   __________   __________       4.0001
           illegal       0.3373       1.0685       2.3421       2.7509
 """
 q_values_k_8_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_8_action_south: """
           illegal       4.1117      illegal       6.9351       6.1718
           illegal       1.9836   __________       5.4992       5.2957
           illegal       0.3287      illegal       4.8325       4.2692
           illegal      -0.5796   __________   __________       2.8946
           illegal      -0.7003       0.3231       1.3076       2.4747
 """
 q_values_k_8_action_west: """
           illegal      -5.9233      illegal       8.6483       7.4826
           illegal      -6.1411   __________       6.6720       6.5394
           illegal      -6.5437      illegal       1.9203       5.2330
           illegal      -6.8815   __________   __________       4.0001
           illegal      -7.0354       0.2213       0.4290       1.6904
 """
 values_k_9: """
          -10.0000       8.4767      10.0000       8.6483       7.4826
          -10.0000       5.7159   __________       7.4826       6.6255
          -10.0000       3.3017       1.0000       5.9589       5.7577
          -10.0000       1.6120   __________   __________       4.8435
          -10.0000       0.3373       1.0685       2.3421       3.6240
 """
 q_values_k_9_action_north: """
           illegal       6.1032      illegal       7.8002       6.8392
           illegal       5.7177   __________       7.4965       6.6572
           illegal       3.3055      illegal       5.9956       5.8249
           illegal       1.6223   __________   __________       5.0174
           illegal       0.3568       1.0105       2.1087       4.0243
 """
 q_values_k_9_action_east: """
           illegal       8.4773      illegal       6.8392       6.6572
           illegal       5.1755   __________       6.0850       5.9620
           illegal       1.3795      illegal       5.3553       5.1777
           illegal       1.4881   __________   __________       4.3316
           illegal       0.9447       1.8787       3.0308       3.3713
 """
 q_values_k_9_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_9_action_south: """
           illegal       4.1155      illegal       6.9609       6.2222
           illegal       1.9917   __________       5.5601       5.4153
           illegal       0.3506      illegal       4.8986       4.5418
           illegal      -0.5121   __________   __________       3.4811
           illegal      -0.5610       1.0105       2.1087       3.1462
 """
 q_values_k_9_action_west: """
           illegal      -5.9227      illegal       8.6518       7.4965
           illegal      -6.1399   __________       6.7021       6.5791
           illegal      -6.5405      illegal       1.9297       5.3226
           illegal      -6.8725   __________   __________       4.3316
           illegal      -7.0246       0.4352       1.1909       2.4484
 """
 values_k_100: """
          -10.0000       8.4777      10.0000       8.6547       7.5087
          -10.0000       5.7186   __________       7.5087       6.6836
          -10.0000       3.3074       1.0000       6.0258       5.8841
          -10.0000       2.0045   __________   __________       5.1665
          -10.0000       2.9289       3.4513       3.9306       4.4765
 """
 q_values_k_100_action_north: """
           illegal       6.1039      illegal       7.8072       6.8610
           illegal       5.7186   __________       7.5087       6.6836
           illegal       3.3074      illegal       6.0258       5.8841
           illegal       1.6617   __________   __________       5.1665
           illegal       0.8539       3.1023       3.5435       4.4765
 """
 q_values_k_100_action_east: """
           illegal       8.4777      illegal       6.8610       6.6836
           illegal       5.1780   __________       6.1334       6.0175
           illegal       1.4151      illegal       5.4546       5.3030
           illegal       2.0045   __________   __________       4.6523
           illegal       2.9289       3.4513       3.9306       4.0910
 """
 q_values_k_100_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
          -10.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_100_action_south: """
           illegal       4.1174      illegal       6.9820       6.2669
           illegal       1.9960   __________       5.6159       5.5138
           illegal       0.6333      illegal       4.9582       4.7918
           illegal       1.3892   __________   __________       4.1531
           illegal       1.5194       3.1023       3.5435       3.9797
 """
 q_values_k_100_action_west: """
           illegal      -5.9223      illegal       8.6547       7.5087
           illegal      -6.1393   __________       6.7275       6.6116
           illegal      -6.5049      illegal       1.9381       5.4051
           illegal      -6.6387   __________   __________       4.6523
           illegal      -6.7560       2.7300       3.1924       3.6979
 """
 policy: """
        exit         east         exit         west         west      
        exit         north        __________   north        north     
        exit         north        exit         north        north     
        exit         east         __________   __________   north     
        exit         east         east         east         north
 """
 actions: """
 north
 east
 exit
 south
 west
 """
@@ -0,0 +1,24 @@
 class: "ValueIterationTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10    _   10    _    _
  -10    _    #    _    _
  -10    _    1    _    _
  -10    _    #    #    _
  -10    S    _    _    _
 """
 discount: "0.9"
 noise: "0.2"
 livingReward: "0.0"
 epsilon: "0.2"
 learningRate: "0.1"
 numExperiences: "3000"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 max_points: "6"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q2/1-bridge-grid.test.
 # File intentionally blank.
@@ -0,0 +1,29 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question2"
 question2: "true"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   # -100 -100 -100 -100 -100    #
   1    S    _    _    _    _    10
   # -100 -100 -100 -100 -100    #   
 """
 gridName: "bridgeGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   _    _    _    _    _    _    _
   _    E    _    _    _    _    _
   _    _    _    _    _    _    _
 """
@@ -0,0 +1,2 @@
 max_points: "1"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q3/1-question-3.1.test.
 # File intentionally blank.
@@ -0,0 +1,31 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question3a"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   _    _    _    _    _  
   _    #    _    _    _
   _    #    1    #   10
   S    _    _    _    _
 -10  -10  -10  -10  -10
 """
 gridName: "discountGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   _    _    _    _    _  
   _    _    _    _    _  
   _    _    _    _    _  
   E    E    N    _    _  
   _    _    _    _    _  
 """
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q3/2-question-3.2.test.
 # File intentionally blank.
@@ -0,0 +1,31 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question3b"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   _    _    _    _    _  
   _    #    _    _    _
   _    #    1    #   10
   S    _    _    _    _
 -10  -10  -10  -10  -10
 """
 gridName: "discountGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   E    E    S    _    _  
   N    _    S    _    _  
   N    _    _    _    _  
   N    _    _    _    _  
   _    _    _    _    _  
 """
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q3/3-question-3.3.test.
 # File intentionally blank.
@@ -0,0 +1,31 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question3c"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   _    _    _    _    _  
   _    #    _    _    _
   _    #    1    #   10
   S    _    _    _    _
 -10  -10  -10  -10  -10
 """
 gridName: "discountGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   _    _    _    _    _  
   _    _    _    _    _  
   _    _    _    _    _  
   E    E    E    E    N  
   _    _    _    _    _  
 """
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q3/4-question-3.4.test.
 # File intentionally blank.
@@ -0,0 +1,36 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question3d"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   _    _    _    _    _  
   _    #    _    _    _
   _    #    1    #   10
   S    _    _    _    _
 -10  -10  -10  -10  -10
 """
 gridName: "discountGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   _    _    _    _    _  
   _    _    _    _    _  
   _    _    _    _    _  
   N    _    _    _    _  
   _    _    _    _    _  
 """
 # State the most probable path must visit
 #    (x,y) for a particular location; (0,0) is bottom left
 #    TERMINAL_STATE for the terminal state
 pathVisits: "(4,2)"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q3/5-question-3.5.test.
 # File intentionally blank.
@@ -0,0 +1,36 @@
 class: "GridPolicyTest"
 # Function in module in analysis that returns (discount, noise)
 parameterFn: "question3e"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
   _    _    _    _    _  
   _    #    _    _    _
   _    #    1    #   10
   S    _    _    _    _
 -10  -10  -10  -10  -10
 """
 gridName: "discountGrid"
 # Policy specification
 #    _ 			policy choice not checked
 #    N, E, S, W policy action must be north, east, south, west
 #
 policy: """
   _    _    _    _    _  
   _    _    _    _    _  
   _    _    _    _    _  
   _    _    _    _    _
   _    _    _    _    _  
 """
 # State the most probable path must not visit
 #    (x,y) for a particular location; (0,0) is bottom left
 #    TERMINAL_STATE for the terminal state
 pathNotVisits: "TERMINAL_STATE"
@@ -0,0 +1,2 @@
 max_points: "5"
 class: "NumberPassedQuestion"
@@ -0,0 +1,342 @@
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
            0.0000
           illegal
            0.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_1_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_2_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_exit: """
            0.0000
           illegal
            1.9000
 """
 q_values_k_3_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_exit: """
            0.0000
           illegal
            2.7100
 """
 q_values_k_4_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_exit: """
           -1.0000
           illegal
            2.7100
 """
 q_values_k_5_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_6_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_7_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_exit: """
           -1.0000
           illegal
            4.0951
 """
 q_values_k_8_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_exit: """
           -1.0000
           illegal
            4.6856
 """
 q_values_k_9_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_100_action_north: """
           illegal
           -0.4534
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            0.4063
           illegal
 """
 q_values_k_100_action_exit: """
           -9.4767
           illegal
            9.8175
 """
 q_values_k_100_action_south: """
           illegal
            2.1267
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            0.3919
           illegal
 """
 values: """
           -9.4767
            2.1267
            9.8175
 """
 policy: """
        exit      
        south     
        exit
 """
@@ -0,0 +1,22 @@
 class: "QLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.5"
 noise: "0.0"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,342 @@
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
            0.0000
           illegal
            0.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_1_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_2_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_exit: """
            0.0000
           illegal
            1.9000
 """
 q_values_k_3_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_exit: """
            0.0000
           illegal
            2.7100
 """
 q_values_k_4_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_exit: """
           -1.0000
           illegal
            2.7100
 """
 q_values_k_5_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_6_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_7_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_exit: """
           -1.0000
           illegal
            4.0951
 """
 q_values_k_8_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_exit: """
           -1.0000
           illegal
            4.6856
 """
 q_values_k_9_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            0.0000
           illegal
 """
 q_values_k_100_action_north: """
           illegal
           -0.6670
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            0.9499
           illegal
 """
 q_values_k_100_action_exit: """
           -9.4767
           illegal
            9.8175
 """
 q_values_k_100_action_south: """
           illegal
            3.2562
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            0.8236
           illegal
 """
 values: """
           -9.4767
            3.2562
            9.8175
 """
 policy: """
        exit      
        south     
        exit
 """
@@ -0,0 +1,22 @@
 class: "QLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.75"
 noise: "0.25"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,570 @@
 q_values_k_0_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_exit: """
        __________       0.0000   __________
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_0_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_1_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_2_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_3_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_4_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
            0.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_5_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_6_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_7_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_8_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.1000   __________
 """
 q_values_k_9_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_north: """
        __________      illegal   __________
           illegal      -5.8648      illegal
           illegal      -0.7995      illegal
           illegal      -0.1671      illegal
           illegal      -1.2642      illegal
           illegal      -0.5871      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_east: """
        __________      illegal   __________
           illegal     -17.0676      illegal
           illegal     -26.5534      illegal
           illegal      -3.6957      illegal
           illegal     -43.5952      illegal
           illegal     -31.6884      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_exit: """
        __________       9.3539   __________
          -96.5663      illegal     -96.9097
          -97.7472      illegal     -94.1850
          -89.0581      illegal     -96.9097
          -97.2187      illegal     -87.8423
          -92.8210      illegal     -97.2187
        __________       0.9576   __________
 """
 q_values_k_500_action_south: """
        __________      illegal   __________
           illegal      -6.8377      illegal
           illegal      -6.7277      illegal
           illegal      -3.4723      illegal
           illegal      -8.4015      illegal
           illegal      -5.5718      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_west: """
        __________      illegal   __________
           illegal     -27.0626      illegal
           illegal     -39.0610      illegal
           illegal     -40.5887      illegal
           illegal     -16.2839      illegal
           illegal     -20.7770      illegal
        __________      illegal   __________
 """
 values: """
        __________       9.3539   __________
          -96.5663      -5.8648     -96.9097
          -97.7472      -0.7995     -94.1850
          -89.0581      -0.1671     -96.9097
          -97.2187      -1.2642     -87.8423
          -92.8210      -0.5871     -97.2187
        __________       0.9576   __________
 """
 policy: """
        __________   exit         __________
        exit         north        exit      
        exit         north        exit      
        exit         north        exit      
        exit         north        exit      
        exit         north        exit      
        __________   exit         __________
 """
@@ -0,0 +1,27 @@
 class: "QLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
    #   10    #
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    S -100
    #    1    #
 """
 gridName: "bridgeGrid"
 discount: "0.85"
 noise: "0.1"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "500"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,456 @@
 q_values_k_0_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_0_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_1_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_2_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_3_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_4_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_5_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
           -1.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_6_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_7_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_7_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_7_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
           -1.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.1000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_7_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_7_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_8_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_8_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_8_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
           -1.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.1000      illegal      illegal
           -1.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_8_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_8_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_9_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal      -0.0900   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_9_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_9_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
           -1.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.1000      illegal      illegal
           -1.0000      illegal   __________   __________      illegal
           -1.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_9_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_9_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3000_action_north: """
           illegal       4.3205      illegal       6.1517       3.8095
           illegal       4.4238   __________       5.2284       3.5129
           illegal       1.0694      illegal       3.6867       2.0418
           illegal       0.3423   __________   __________       1.0655
           illegal       0.0073       0.0079       0.0484       0.3768
 """
 q_values_k_3000_action_east: """
           illegal       8.0584      illegal       3.7245       3.3947
           illegal       2.0499   __________       3.2373       2.1742
           illegal       0.8687      illegal       1.7398       1.2671
           illegal       0.2927   __________   __________       0.6669
           illegal       0.0239       0.0097       0.1611       0.2051
 """
 q_values_k_3000_action_exit: """
          -10.0000      illegal      10.0000      illegal      illegal
          -10.0000      illegal   __________      illegal      illegal
          -10.0000      illegal       1.0000      illegal      illegal
          -10.0000      illegal   __________   __________      illegal
           -9.9999      illegal      illegal      illegal      illegal
 """
 q_values_k_3000_action_south: """
           illegal      -0.3521      illegal       3.6948       2.9139
           illegal      -0.5605   __________       2.1346       1.5674
           illegal       0.2093      illegal       1.5389       0.5521
           illegal      -0.5505   __________   __________       0.1006
           illegal      -1.8501       0.0060       0.0514       0.1223
 """
 q_values_k_3000_action_west: """
           illegal      -6.2001      illegal       7.5146       4.9014
           illegal      -5.4013   __________       4.0484       3.4126
           illegal      -8.0399      illegal       0.9653       1.6081
           illegal      -7.4767   __________   __________       0.3934
           illegal      -6.3432       0.0179       0.0188       0.1028
 """
 values: """
          -10.0000       8.0584      10.0000       7.5146       4.9014
          -10.0000       4.4238   __________       5.2284       3.5129
          -10.0000       1.0694       1.0000       3.6867       2.0418
          -10.0000       0.3423   __________   __________       1.0655
           -9.9999       0.0239       0.0179       0.1611       0.3768
 """
 policy: """
        exit         east         exit         west         west      
        exit         north        __________   north        north     
        exit         north        exit         north        north     
        exit         north        __________   __________   north     
        exit         east         west         east         north
 """
@@ -0,0 +1,24 @@
 class: "QLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10    _   10    _    _
  -10    _    #    _    _
  -10    _    1    _    _
  -10    _    #    #    _
  -10    S    _    _    _
 """
 discount: "0.9"
 noise: "0.2"
 livingReward: "0.0"
 epsilon: "0.2"
 learningRate: "0.1"
 numExperiences: "3000"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 max_points: "5"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q5/1-tinygrid.test.
 # File intentionally blank.
@@ -0,0 +1,22 @@
 class: "EpsilonGreedyTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.5"
 noise: "0.0"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
 # File intentionally blank.
@@ -0,0 +1,22 @@
 class: "EpsilonGreedyTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.75"
 noise: "0.25"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q5/3-bridge.test.
 # File intentionally blank.
@@ -0,0 +1,27 @@
 class: "EpsilonGreedyTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
    #   10    #
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    S -100
    #    1    #
 """
 gridName: "bridgeGrid"
 discount: "0.85"
 noise: "0.1"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "500"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q5/4-discountgrid.test.
 # File intentionally blank.
@@ -0,0 +1,24 @@
 class: "EpsilonGreedyTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10    _   10    _    _
  -10    _    #    _    _
  -10    _    1    _    _
  -10    _    #    #    _
  -10    S    _    _    _
 """
 discount: "0.9"
 noise: "0.2"
 livingReward: "0.0"
 epsilon: "0.2"
 learningRate: "0.1"
 numExperiences: "3000"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,2 @@
 max_points: "3"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,2 @@
 max_points: "1"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q6/grade-agent.test.
 # File intentionally blank.
@@ -0,0 +1,2 @@
 class: "Question6Test"
@@ -0,0 +1,2 @@
 max_points: "1"
 class: "PartialCreditQuestion"
@@ -0,0 +1,2 @@
 # This is the solution file for test_cases/q7/grade-agent.test.
 # File intentionally blank.
@@ -0,0 +1,6 @@
 class: "EvalAgentTest"
 # 100 test games after 2000 training games
 pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
 winsThresholds: "70"
@@ -0,0 +1,429 @@
 weights_k_0: """
 {((0, 0), 'exit'): 0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
            0.0000
           illegal
            0.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_1: """
 {((0, 0), 'exit'): 1.0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_1_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_1_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_2: """
 {((0, 0), 'exit'): 1.0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_2_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_2_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_3: """
 {((0, 0), 'exit'): 1.9,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_3_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_exit: """
            0.0000
           illegal
            1.9000
 """
 q_values_k_3_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_4: """
 {((0, 0), 'exit'): 2.71,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_4_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_exit: """
            0.0000
           illegal
            2.7100
 """
 q_values_k_4_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_5: """
 {((0, 0), 'exit'): 2.71,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_5_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_exit: """
           -1.0000
           illegal
            2.7100
 """
 q_values_k_5_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_6: """
 {((0, 0), 'exit'): 3.439,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_6_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_6_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_7: """
 {((0, 0), 'exit'): 3.439,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.17195000000000002,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_7_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_7_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_8: """
 {((0, 0), 'exit'): 4.0951,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.17195000000000002,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_8_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_exit: """
           -1.0000
           illegal
            4.0951
 """
 q_values_k_8_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_9: """
 {((0, 0), 'exit'): 4.68559,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.17195000000000002,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_9_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_exit: """
           -1.0000
           illegal
            4.6856
 """
 q_values_k_9_action_south: """
           illegal
            0.1720
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_100: """
 {((0, 0), 'exit'): 9.817519963685992,
 ((0, 1), 'east'): 0.40629236674335106,
 ((0, 1), 'north'): -0.4534185789984799,
 ((0, 1), 'south'): 2.126721095524319,
 ((0, 1), 'west'): 0.39193283364906867,
 ((0, 2), 'exit'): -9.476652366972639}
 """
 q_values_k_100_action_north: """
           illegal
           -0.4534
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            0.4063
           illegal
 """
 q_values_k_100_action_exit: """
           -9.4767
           illegal
            9.8175
 """
 q_values_k_100_action_south: """
           illegal
            2.1267
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            0.3919
           illegal
 """
@@ -0,0 +1,22 @@
 class: "ApproximateQLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.5"
 noise: "0.0"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,429 @@
 weights_k_0: """
 {((0, 0), 'exit'): 0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_0_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_exit: """
            0.0000
           illegal
            0.0000
 """
 q_values_k_0_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_0_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_1: """
 {((0, 0), 'exit'): 1.0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_1_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_1_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_1_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_2: """
 {((0, 0), 'exit'): 1.0,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_2_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_exit: """
            0.0000
           illegal
            1.0000
 """
 q_values_k_2_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_2_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_3: """
 {((0, 0), 'exit'): 1.9,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_3_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_exit: """
            0.0000
           illegal
            1.9000
 """
 q_values_k_3_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_3_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_4: """
 {((0, 0), 'exit'): 2.71,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): 0}
 """
 q_values_k_4_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_exit: """
            0.0000
           illegal
            2.7100
 """
 q_values_k_4_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_4_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_5: """
 {((0, 0), 'exit'): 2.71,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_5_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_exit: """
           -1.0000
           illegal
            2.7100
 """
 q_values_k_5_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_5_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_6: """
 {((0, 0), 'exit'): 3.439,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.0,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_6_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_6_action_south: """
           illegal
            0.0000
           illegal
 """
 q_values_k_6_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_7: """
 {((0, 0), 'exit'): 3.439,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.257925,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_7_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_7_action_exit: """
           -1.0000
           illegal
            3.4390
 """
 q_values_k_7_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_7_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_8: """
 {((0, 0), 'exit'): 4.0951,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.257925,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_8_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_8_action_exit: """
           -1.0000
           illegal
            4.0951
 """
 q_values_k_8_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_8_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_9: """
 {((0, 0), 'exit'): 4.68559,
 ((0, 1), 'east'): 0,
 ((0, 1), 'north'): 0,
 ((0, 1), 'south'): 0.257925,
 ((0, 1), 'west'): 0,
 ((0, 2), 'exit'): -1.0}
 """
 q_values_k_9_action_north: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_east: """
           illegal
            0.0000
           illegal
 """
 q_values_k_9_action_exit: """
           -1.0000
           illegal
            4.6856
 """
 q_values_k_9_action_south: """
           illegal
            0.2579
           illegal
 """
 q_values_k_9_action_west: """
           illegal
            0.0000
           illegal
 """
 weights_k_100: """
 {((0, 0), 'exit'): 9.817519963685992,
 ((0, 1), 'east'): 0.9498968104823575,
 ((0, 1), 'north'): -0.66699795412272,
 ((0, 1), 'south'): 3.256207905310105,
 ((0, 1), 'west'): 0.8236280735014627,
 ((0, 2), 'exit'): -9.476652366972639}
 """
 q_values_k_100_action_north: """
           illegal
           -0.6670
           illegal
 """
 q_values_k_100_action_east: """
           illegal
            0.9499
           illegal
 """
 q_values_k_100_action_exit: """
           -9.4767
           illegal
            9.8175
 """
 q_values_k_100_action_south: """
           illegal
            3.2562
           illegal
 """
 q_values_k_100_action_west: """
           illegal
            0.8236
           illegal
 """
@@ -0,0 +1,22 @@
 class: "ApproximateQLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10
    S
   10
 """
 discount: "0.75"
 noise: "0.25"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "100"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,935 @@
 weights_k_0: """
 {((0, 1), 'exit'): 0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): 0,
 ((0, 5), 'exit'): 0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): 0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_0_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_exit: """
        __________       0.0000   __________
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_0_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_0_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_1: """
 {((0, 1), 'exit'): 0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): 0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): 0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_1_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_1_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_1_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_2: """
 {((0, 1), 'exit'): 0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): 0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): 0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_2_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_2_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_2_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_3: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): 0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): 0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_3_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_3_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_3_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_4: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): 0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_4_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
            0.0000      illegal       0.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_4_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_4_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_5: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): 0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): -10.0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_5_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
            0.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_5_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_5_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_6: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): -10.0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): -10.0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_6_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_6_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_6_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_7: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): 0,
 ((0, 3), 'exit'): -10.0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0.0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): -10.0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_7_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
            0.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_7_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_7_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_8: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): -10.0,
 ((0, 3), 'exit'): -10.0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0.0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): -10.0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_8_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.0000   __________
 """
 q_values_k_8_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_8_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_9: """
 {((0, 1), 'exit'): -10.0,
 ((0, 2), 'exit'): -10.0,
 ((0, 3), 'exit'): -10.0,
 ((0, 4), 'exit'): -10.0,
 ((0, 5), 'exit'): -10.0,
 ((1, 0), 'exit'): 0.1,
 ((1, 1), 'east'): 0,
 ((1, 1), 'north'): 0,
 ((1, 1), 'south'): 0,
 ((1, 1), 'west'): 0,
 ((1, 2), 'east'): 0,
 ((1, 2), 'north'): 0,
 ((1, 2), 'south'): 0,
 ((1, 2), 'west'): 0,
 ((1, 3), 'east'): 0,
 ((1, 3), 'north'): 0,
 ((1, 3), 'south'): 0.0,
 ((1, 3), 'west'): 0,
 ((1, 4), 'east'): 0,
 ((1, 4), 'north'): 0,
 ((1, 4), 'south'): 0,
 ((1, 4), 'west'): 0,
 ((1, 5), 'east'): 0,
 ((1, 5), 'north'): 0,
 ((1, 5), 'south'): 0.0,
 ((1, 5), 'west'): 0,
 ((1, 6), 'exit'): 0,
 ((2, 1), 'exit'): 0,
 ((2, 2), 'exit'): 0,
 ((2, 3), 'exit'): -10.0,
 ((2, 4), 'exit'): 0,
 ((2, 5), 'exit'): 0}
 """
 q_values_k_9_action_north: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_east: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_exit: """
        __________       0.0000   __________
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal     -10.0000
          -10.0000      illegal       0.0000
          -10.0000      illegal       0.0000
        __________       0.1000   __________
 """
 q_values_k_9_action_south: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 q_values_k_9_action_west: """
        __________      illegal   __________
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
           illegal       0.0000      illegal
        __________      illegal   __________
 """
 weights_k_500: """
 {((0, 1), 'exit'): -92.82102012308148,
 ((0, 2), 'exit'): -97.21871610556306,
 ((0, 3), 'exit'): -89.05810108684878,
 ((0, 4), 'exit'): -97.74716004550608,
 ((0, 5), 'exit'): -96.56631617970748,
 ((1, 0), 'exit'): 0.9576088417247839,
 ((1, 1), 'east'): -31.68839649871871,
 ((1, 1), 'north'): -0.5871409700255297,
 ((1, 1), 'south'): -5.571799344704395,
 ((1, 1), 'west'): -20.777007017445538,
 ((1, 2), 'east'): -43.595242197319,
 ((1, 2), 'north'): -1.264202431807023,
 ((1, 2), 'south'): -8.401530599975509,
 ((1, 2), 'west'): -16.283916171605192,
 ((1, 3), 'east'): -3.6956691,
 ((1, 3), 'north'): -0.16712710492783758,
 ((1, 3), 'south'): -3.4722840178579073,
 ((1, 3), 'west'): -40.58867937480968,
 ((1, 4), 'east'): -26.553386621338632,
 ((1, 4), 'north'): -0.799493322153628,
 ((1, 4), 'south'): -6.727671187497919,
 ((1, 4), 'west'): -39.06095135014759,
 ((1, 5), 'east'): -17.067638934181446,
 ((1, 5), 'north'): -5.864753060887024,
 ((1, 5), 'south'): -6.83769420759525,
 ((1, 5), 'west'): -27.062643066307515,
 ((1, 6), 'exit'): 9.353891811077332,
 ((2, 1), 'exit'): -97.21871610556306,
 ((2, 2), 'exit'): -87.84233454094309,
 ((2, 3), 'exit'): -96.90968456173674,
 ((2, 4), 'exit'): -94.185026299696,
 ((2, 5), 'exit'): -96.90968456173674}
 """
 q_values_k_500_action_north: """
        __________      illegal   __________
           illegal      -5.8648      illegal
           illegal      -0.7995      illegal
           illegal      -0.1671      illegal
           illegal      -1.2642      illegal
           illegal      -0.5871      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_east: """
        __________      illegal   __________
           illegal     -17.0676      illegal
           illegal     -26.5534      illegal
           illegal      -3.6957      illegal
           illegal     -43.5952      illegal
           illegal     -31.6884      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_exit: """
        __________       9.3539   __________
          -96.5663      illegal     -96.9097
          -97.7472      illegal     -94.1850
          -89.0581      illegal     -96.9097
          -97.2187      illegal     -87.8423
          -92.8210      illegal     -97.2187
        __________       0.9576   __________
 """
 q_values_k_500_action_south: """
        __________      illegal   __________
           illegal      -6.8377      illegal
           illegal      -6.7277      illegal
           illegal      -3.4723      illegal
           illegal      -8.4015      illegal
           illegal      -5.5718      illegal
        __________      illegal   __________
 """
 q_values_k_500_action_west: """
        __________      illegal   __________
           illegal     -27.0626      illegal
           illegal     -39.0610      illegal
           illegal     -40.5887      illegal
           illegal     -16.2839      illegal
           illegal     -20.7770      illegal
        __________      illegal   __________
 """
@@ -0,0 +1,27 @@
 class: "ApproximateQLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
    #   10    #
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    _ -100
 -100    S -100
    #    1    #
 """
 gridName: "bridgeGrid"
 discount: "0.85"
 noise: "0.1"
 livingReward: "0.0"
 epsilon: "0.5"
 learningRate: "0.1"
 numExperiences: "500"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,24 @@
 class: "ApproximateQLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10    _   10    _    _
  -10    _    #    _    _
  -10    _    1    _    _
  -10    _    #    #    _
  -10    S    _    _    _
 """
 discount: "0.9"
 noise: "0.2"
 livingReward: "0.0"
 epsilon: "0.2"
 learningRate: "0.1"
 numExperiences: "3000"
 valueIterations: "100"
 iterations: "10000"
@@ -0,0 +1,880 @@
 weights_k_0: """
 {'action=east': 0,
 'action=exit': 0,
 'action=north': 0,
 'action=south': 0,
 'action=west': 0,
 'x=0': 0,
 'x=1': 0,
 'x=2': 0,
 'x=3': 0,
 'x=4': 0,
 'y=0': 0,
 'y=1': 0,
 'y=2': 0,
 'y=3': 0,
 'y=4': 0,
 (0, 0): 0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_0_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_0_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_0_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_1: """
 {'action=east': 0,
 'action=exit': 0,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0,
 'x=0': 0,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0,
 'x=4': 0,
 'y=0': 0,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0,
 'y=4': 0,
 (0, 0): 0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_1_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_1_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_1_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_2: """
 {'action=east': 0,
 'action=exit': 0,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0,
 'x=0': 0,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0.0,
 'x=4': 0,
 'y=0': 0,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0.0,
 'y=4': 0,
 (0, 0): 0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_2_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_exit: """
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________      illegal      illegal
            0.0000      illegal       0.0000      illegal      illegal
            0.0000      illegal   __________   __________      illegal
            0.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_2_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_2_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_3: """
 {'action=east': 0,
 'action=exit': -1.0,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0,
 'x=0': -1.0,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0.0,
 'x=4': 0,
 'y=0': -1.0,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0.0,
 'y=4': 0,
 (0, 0): -1.0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_3_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_exit: """
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________      illegal      illegal
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________   __________      illegal
           -4.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_3_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_3_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_4: """
 {'action=east': 0.0,
 'action=exit': -1.0,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0,
 'x=0': -1.0,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0.0,
 'x=4': 0,
 'y=0': -1.0,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0.0,
 'y=4': 0,
 (0, 0): -1.0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_4_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_exit: """
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________      illegal      illegal
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________   __________      illegal
           -4.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_4_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_4_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_5: """
 {'action=east': 0.0,
 'action=exit': -1.0,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0.0,
 'x=0': -1.0,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0.0,
 'x=4': 0.0,
 'y=0': -1.0,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0.0,
 'y=4': 0.0,
 (0, 0): -1.0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0.0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_5_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_exit: """
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________      illegal      illegal
           -3.0000      illegal      -1.0000      illegal      illegal
           -3.0000      illegal   __________   __________      illegal
           -4.0000      illegal      illegal      illegal      illegal
 """
 q_values_k_5_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_5_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_6: """
 {'action=east': 0.0,
 'action=exit': -1.7000000000000002,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0.0,
 'x=0': -1.7000000000000002,
 'x=1': 0.0,
 'x=2': 0,
 'x=3': 0.0,
 'x=4': 0.0,
 'y=0': -1.7000000000000002,
 'y=1': 0.0,
 'y=2': 0,
 'y=3': 0.0,
 'y=4': 0.0,
 (0, 0): -1.0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): -0.7000000000000001,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0.0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_6_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_exit: """
           -5.1000      illegal      -1.7000      illegal      illegal
           -5.8000      illegal   __________      illegal      illegal
           -5.1000      illegal      -1.7000      illegal      illegal
           -5.1000      illegal   __________   __________      illegal
           -6.1000      illegal      illegal      illegal      illegal
 """
 q_values_k_6_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 q_values_k_6_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.0000       0.0000       0.0000
 """
 weights_k_7: """
 {'action=east': 0.0,
 'action=exit': -1.4300000000000002,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0.0,
 'x=0': -1.7000000000000002,
 'x=1': 0.0,
 'x=2': 0.27,
 'x=3': 0.0,
 'x=4': 0.0,
 'y=0': -1.7000000000000002,
 'y=1': 0.0,
 'y=2': 0.27,
 'y=3': 0.0,
 'y=4': 0.0,
 (0, 0): -1.0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): -0.7000000000000001,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0.27,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0.0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_7_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_7_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_7_action_exit: """
           -4.8300      illegal      -0.8900      illegal      illegal
           -5.5300      illegal   __________      illegal      illegal
           -4.8300      illegal      -0.6200      illegal      illegal
           -4.8300      illegal   __________   __________      illegal
           -5.8300      illegal      illegal      illegal      illegal
 """
 q_values_k_7_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_7_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 weights_k_8: """
 {'action=east': 0.0,
 'action=exit': -1.947,
 'action=north': 0,
 'action=south': 0.0,
 'action=west': 0.0,
 'x=0': -2.217,
 'x=1': 0.0,
 'x=2': 0.27,
 'x=3': 0.0,
 'x=4': 0.0,
 'y=0': -2.217,
 'y=1': 0.0,
 'y=2': 0.27,
 'y=3': 0.0,
 'y=4': 0.0,
 (0, 0): -1.0,
 (0, 1): -0.517,
 (0, 2): 0,
 (0, 3): -0.7000000000000001,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0.27,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0.0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_8_action_north: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_8_action_east: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_8_action_exit: """
           -6.3810      illegal      -1.4070      illegal      illegal
           -7.0810      illegal   __________      illegal      illegal
           -6.3810      illegal      -1.1370      illegal      illegal
           -6.8980      illegal   __________   __________      illegal
           -7.3810      illegal      illegal      illegal      illegal
 """
 q_values_k_8_action_south: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 q_values_k_8_action_west: """
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________       0.0000       0.0000
           illegal       0.0000      illegal       0.0000       0.0000
           illegal       0.0000   __________   __________       0.0000
           illegal       0.0000       0.5400       0.0000       0.0000
 """
 weights_k_9: """
 {'action=east': 0.0,
 'action=exit': -1.947,
 'action=north': -0.62082,
 'action=south': 0.0,
 'action=west': 0.0,
 'x=0': -2.217,
 'x=1': -0.62082,
 'x=2': 0.27,
 'x=3': 0.0,
 'x=4': 0.0,
 'y=0': -2.217,
 'y=1': -0.62082,
 'y=2': 0.27,
 'y=3': 0.0,
 'y=4': 0.0,
 (0, 0): -1.0,
 (0, 1): -0.517,
 (0, 2): 0,
 (0, 3): -0.7000000000000001,
 (0, 4): 0,
 (1, 0): 0.0,
 (1, 1): -0.62082,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (2, 0): 0,
 (2, 2): 0.27,
 (2, 4): 0,
 (3, 0): 0.0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (4, 0): 0,
 (4, 1): 0.0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0}
 """
 q_values_k_9_action_north: """
           illegal      -1.8625      illegal      -0.6208      -0.6208
           illegal      -1.8625   __________      -0.6208      -0.6208
           illegal      -1.8625      illegal      -0.6208      -0.6208
           illegal      -2.4833   __________   __________      -0.6208
           illegal      -1.8625      -0.0808      -0.6208      -0.6208
 """
 q_values_k_9_action_east: """
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.2416   __________       0.0000       0.0000
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.8625   __________   __________       0.0000
           illegal      -1.2416       0.5400       0.0000       0.0000
 """
 q_values_k_9_action_exit: """
           -6.3810      illegal      -1.4070      illegal      illegal
           -7.0810      illegal   __________      illegal      illegal
           -6.3810      illegal      -1.1370      illegal      illegal
           -6.8980      illegal   __________   __________      illegal
           -7.3810      illegal      illegal      illegal      illegal
 """
 q_values_k_9_action_south: """
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.2416   __________       0.0000       0.0000
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.8625   __________   __________       0.0000
           illegal      -1.2416       0.5400       0.0000       0.0000
 """
 q_values_k_9_action_west: """
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.2416   __________       0.0000       0.0000
           illegal      -1.2416      illegal       0.0000       0.0000
           illegal      -1.8625   __________   __________       0.0000
           illegal      -1.2416       0.5400       0.0000       0.0000
 """
 weights_k_3000: """
 {'action=east': 6.719916513522846,
 'action=exit': -2.2444981376861555,
 'action=north': 4.568574519923728,
 'action=south': 3.761510351874819,
 'action=west': 1.2828606322891556,
 'x=0': -3.604063955849794,
 'x=1': 0.6731476152061693,
 'x=2': 4.000208353074704,
 'x=3': 5.988311380073477,
 'x=4': 7.0307604874198235,
 'y=0': -3.604063955849794,
 'y=1': 0.6731476152061693,
 'y=2': 4.000208353074704,
 'y=3': 5.988311380073477,
 'y=4': 7.0307604874198235,
 (0, 0): -0.7073688447583666,
 (0, 1): -0.7542862401704076,
 (0, 2): -0.7043014501203066,
 (0, 3): -0.7433344649617668,
 (0, 4): -0.6947729558389527,
 (1, 0): 2.364273811399719,
 (1, 1): -0.2695405704605499,
 (1, 2): -0.7105979212702271,
 (1, 3): -1.4866826750327933,
 (1, 4): 0.7756949705700219,
 (2, 0): 2.64064253491107,
 (2, 2): -3.7381118310263166,
 (2, 4): 5.097677649189953,
 (3, 0): 2.505262939441149,
 (3, 2): 0.27218788923837256,
 (3, 3): 2.2611084206093195,
 (3, 4): 0.9497521307846304,
 (4, 0): 1.7330586015291545,
 (4, 1): 0.980194046153168,
 (4, 2): 0.78786289128181,
 (4, 3): 1.493343270762865,
 (4, 4): 2.0363016776928333}
 """
 q_values_k_3000_action_north: """
           illegal       6.6906      illegal      17.4949      20.6664
           illegal       4.4282   __________      18.8063      20.1234
           illegal       5.2043      illegal      16.8174      19.4180
           illegal       5.6453   __________   __________      19.6103
           illegal       8.2791      15.2096      19.0505      20.3632
 """
 q_values_k_3000_action_east: """
           illegal       8.8419      illegal      19.6463      22.8177
           illegal       6.5795   __________      20.9576      22.2748
           illegal       7.3556      illegal      18.9687      21.5693
           illegal       7.7967   __________   __________      21.7616
           illegal      10.4305      17.3610      21.2018      22.5145
 """
 q_values_k_3000_action_exit: """
          -10.1474      illegal      10.8536      illegal      illegal
          -10.1960      illegal   __________      illegal      illegal
          -10.1569      illegal       2.0178      illegal      illegal
          -10.2069      illegal   __________   __________      illegal
          -10.1600      illegal      illegal      illegal      illegal
 """
 q_values_k_3000_action_south: """
           illegal       5.8835      illegal      16.6879      19.8593
           illegal       3.6211   __________      17.9992      19.3164
           illegal       4.3972      illegal      16.0103      18.6109
           illegal       4.8383   __________   __________      18.8032
           illegal       7.4721      14.4026      18.2434      19.5561
 """
 q_values_k_3000_action_west: """
           illegal       3.4049      illegal      14.2092      17.3807
           illegal       1.1425   __________      15.5206      16.8377
           illegal       1.9186      illegal      13.5317      16.1322
           illegal       2.3596   __________   __________      16.3246
           illegal       4.9934      11.9239      15.7647      17.0774
 """
@@ -0,0 +1,25 @@
 class: "ApproximateQLearningTest"
 # GridWorld specification
 #    _ is empty space
 #    numbers are terminal states with that value
 #    # is a wall
 #    S is a start state
 #
 grid: """
  -10    _   10    _    _
  -10    _    #    _    _
  -10    _    1    _    _
  -10    _    #    #    _
  -10    S    _    _    _
 """
 discount: "0.9"
 noise: "0.2"
 livingReward: "0.0"
 epsilon: "0.2"
 learningRate: "0.1"
 numExperiences: "3000"
 valueIterations: "100"
 iterations: "10000"
 extractor: "CoordinateExtractor"
@@ -0,0 +1,2 @@
 max_points: "3"
 class: "PassAllTestsQuestion"
@@ -0,0 +1,81 @@
 # textDisplay.py
 # --------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import time
 try: 
    import pacman
 except:
    pass
 DRAW_EVERY = 1
 SLEEP_TIME = 0 # This can be overwritten by __init__
 DISPLAY_MOVES = False
 QUIET = False # Supresses output
 class NullGraphics:
    def initialize(self, state, isBlue = False):
        pass
    def update(self, state):
        pass
    def checkNullDisplay(self):
        return True
    def pause(self):
        time.sleep(SLEEP_TIME)
    def draw(self, state):
        print state
    def updateDistributions(self, dist):
        pass
    def finish(self):
        pass
 class PacmanGraphics:
    def __init__(self, speed=None):
        if speed != None:
            global SLEEP_TIME
            SLEEP_TIME = speed
    def initialize(self, state, isBlue = False):
        self.draw(state)
        self.pause()
        self.turn = 0
        self.agentCounter = 0
    def update(self, state):
        numAgents = len(state.agentStates)
        self.agentCounter = (self.agentCounter + 1) % numAgents
        if self.agentCounter == 0:
            self.turn += 1
            if DISPLAY_MOVES:
                ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
                print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
            if self.turn % DRAW_EVERY == 0:
                self.draw(state)
                self.pause()
        if state._win or state._lose:
            self.draw(state)
    def pause(self):
        time.sleep(SLEEP_TIME)
    def draw(self, state):
        print state
    def finish(self):
        pass
@@ -0,0 +1,324 @@
 # textGridworldDisplay.py
 # -----------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import util
 class TextGridworldDisplay:
    def __init__(self, gridworld):
        self.gridworld = gridworld
    def start(self):
        pass
    def pause(self):
        pass
    def displayValues(self, agent, currentState = None, message = None):
        if message != None:
            print message
        values = util.Counter()
        policy = {}
        states = self.gridworld.getStates()
        for state in states:
            values[state] = agent.getValue(state)
            policy[state] = agent.getPolicy(state)
        prettyPrintValues(self.gridworld, values, policy, currentState)
    def displayNullValues(self, agent, currentState = None, message = None):
        if message != None: print message
        prettyPrintNullValues(self.gridworld, currentState)
    def displayQValues(self, agent, currentState = None, message = None):
        if message != None: print message
        qValues = util.Counter()
        states = self.gridworld.getStates()
        for state in states:
            for action in self.gridworld.getPossibleActions(state):
                qValues[(state, action)] = agent.getQValue(state, action)
        prettyPrintQValues(self.gridworld, qValues, currentState)
 def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
    grid = gridWorld.grid
    maxLen = 11
    newRows = []
    for y in range(grid.height):
        newRow = []
        for x in range(grid.width):
            state = (x, y)
            value = values[state]
            action = None
            if policy != None and state in policy:
                action = policy[state]
            actions = gridWorld.getPossibleActions(state)
            if action not in actions and 'exit' in actions:
                action = 'exit'
            valString = None
            if action == 'exit':
                valString = border('%.2f' % value)
            else:
                valString = '\n\n%.2f\n\n' % value
                valString += ' '*maxLen
            if grid[x][y] == 'S':
                valString = '\n\nS: %.2f\n\n'  % value
                valString += ' '*maxLen
            if grid[x][y] == '#':
                valString = '\n#####\n#####\n#####\n'
                valString += ' '*maxLen
            pieces = [valString]
            text = ("\n".join(pieces)).split('\n')
            if currentState == state:
                l = len(text[1])
                if l == 0:
                    text[1] = '*'
                else:
                    text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
            if action == 'east':
                text[2] = '  ' + text[2]  + ' >'
            elif action == 'west':
                text[2] = '< ' + text[2]  + '  '
            elif action == 'north':
                text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
            elif action == 'south':
                text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
            newCell = "\n".join(text)
            newRow.append(newCell)
        newRows.append(newRow)
    numCols = grid.width
    for rowNum, row in enumerate(newRows):
        row.insert(0,"\n\n"+str(rowNum))
    newRows.reverse()
    colLabels = [str(colNum) for colNum in range(numCols)]
    colLabels.insert(0,' ')
    finalRows = [colLabels] + newRows
    print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
 def prettyPrintNullValues(gridWorld, currentState = None):
    grid = gridWorld.grid
    maxLen = 11
    newRows = []
    for y in range(grid.height):
        newRow = []
        for x in range(grid.width):
            state = (x, y)
            # value = values[state]
            action = None
            # if policy != None and state in policy:
            #   action = policy[state]
            #
            actions = gridWorld.getPossibleActions(state)
            if action not in actions and 'exit' in actions:
                action = 'exit'
            valString = None
            # if action == 'exit':
            #   valString = border('%.2f' % value)
            # else:
            #   valString = '\n\n%.2f\n\n' % value
            #   valString += ' '*maxLen
            if grid[x][y] == 'S':
                valString = '\n\nS\n\n'
                valString += ' '*maxLen
            elif grid[x][y] == '#':
                valString = '\n#####\n#####\n#####\n'
                valString += ' '*maxLen
            elif type(grid[x][y]) == float or type(grid[x][y]) == int:
                valString = border('%.2f' % float(grid[x][y]))
            else: valString = border('  ')
            pieces = [valString]
            text = ("\n".join(pieces)).split('\n')
            if currentState == state:
                l = len(text[1])
                if l == 0:
                    text[1] = '*'
                else:
                    text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
            if action == 'east':
                text[2] = '  ' + text[2]  + ' >'
            elif action == 'west':
                text[2] = '< ' + text[2]  + '  '
            elif action == 'north':
                text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
            elif action == 'south':
                text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
            newCell = "\n".join(text)
            newRow.append(newCell)
        newRows.append(newRow)
    numCols = grid.width
    for rowNum, row in enumerate(newRows):
        row.insert(0,"\n\n"+str(rowNum))
    newRows.reverse()
    colLabels = [str(colNum) for colNum in range(numCols)]
    colLabels.insert(0,' ')
    finalRows = [colLabels] + newRows
    print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
 def prettyPrintQValues(gridWorld, qValues, currentState=None):
    grid = gridWorld.grid
    maxLen = 11
    newRows = []
    for y in range(grid.height):
        newRow = []
        for x in range(grid.width):
            state = (x, y)
            actions = gridWorld.getPossibleActions(state)
            if actions == None or len(actions) == 0:
                actions = [None]
            bestQ = max([qValues[(state, action)] for action in actions])
            bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
            # display cell
            qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
            northString = ('north' in qStrings and qStrings['north']) or ' '
            southString = ('south' in qStrings and qStrings['south']) or ' '
            eastString = ('east' in qStrings and qStrings['east']) or ' '
            westString = ('west' in qStrings and qStrings['west']) or ' '
            exitString = ('exit' in qStrings and qStrings['exit']) or ' '
            eastLen = len(eastString)
            westLen = len(westString)
            if eastLen < westLen:
                eastString = ' '*(westLen-eastLen)+eastString
            if westLen < eastLen:
                westString = westString+' '*(eastLen-westLen)
            if 'north' in bestActions:
                northString = '/'+northString+'\\'
            if 'south' in bestActions:
                southString = '\\'+southString+'/'
            if 'east' in bestActions:
                eastString = ''+eastString+'>'
            else:
                eastString = ''+eastString+' '
            if 'west' in bestActions:
                westString = '<'+westString+''
            else:
                westString = ' '+westString+''
            if 'exit' in bestActions:
                exitString = '[ '+exitString+' ]'
            ewString = westString + "     " + eastString
            if state == currentState:
                ewString = westString + "  *  " + eastString
            if state == gridWorld.getStartState():
                ewString = westString + "  S  " + eastString
            if state == currentState and state == gridWorld.getStartState():
                ewString = westString + " S:* " + eastString
            text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
            if grid[x][y] == '#':
                text = ['', '\n#####\n#####\n#####', '']
            newCell = "\n".join(text)
            newRow.append(newCell)
        newRows.append(newRow)
    numCols = grid.width
    for rowNum, row in enumerate(newRows):
        row.insert(0,"\n\n\n"+str(rowNum))
    newRows.reverse()
    colLabels = [str(colNum) for colNum in range(numCols)]
    colLabels.insert(0,' ')
    finalRows = [colLabels] + newRows
    print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
 def border(text):
    length = len(text)
    pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
    return '\n'.join(pieces)
 # INDENTING CODE
 # Indenting code based on a post from George Sakkis
 # (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
 import cStringIO,operator
 def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
           separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
    """Indents a table by column.
       - rows: A sequence of sequences of items, one sequence per row.
       - hasHeader: True if the first row consists of the columns' names.
       - headerChar: Character to be used for the row separator line
         (if hasHeader==True or separateRows==True).
       - delim: The column delimiter.
       - justify: Determines how are data justified in their column.
         Valid values are 'left','right' and 'center'.
       - separateRows: True if rows are to be separated by a line
         of 'headerChar's.
       - prefix: A string prepended to each printed row.
       - postfix: A string appended to each printed row.
       - wrapfunc: A function f(text) for wrapping text; each element in
         the table is first wrapped by this function."""
    # closure for breaking logical rows to physical, using wrapfunc
    def rowWrapper(row):
        newRows = [wrapfunc(item).split('\n') for item in row]
        return [[substr or '' for substr in item] for item in map(None,*newRows)]
    # break each logical row into one or more physical ones
    logicalRows = [rowWrapper(row) for row in rows]
    # columns of physical rows
    columns = map(None,*reduce(operator.add,logicalRows))
    # get the maximum of each column by the string length of its items
    maxWidths = [max([len(str(item)) for item in column]) for column in columns]
    rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
                                 len(delim)*(len(maxWidths)-1))
    # select the appropriate justify method
    justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
    output=cStringIO.StringIO()
    if separateRows: print >> output, rowSeparator
    for physicalRows in logicalRows:
        for row in physicalRows:
            print >> output, \
                prefix \
                + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
                + postfix
        if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
    return output.getvalue()
 import math
 def wrap_always(text, width):
    """A simple word-wrap function that wraps text on exactly width characters.
       It doesn't split the text in words."""
    return '\n'.join([ text[width*i:width*(i+1)] \
                       for i in xrange(int(math.ceil(1.*len(text)/width))) ])
 # TEST OF DISPLAY CODE
 if __name__ == '__main__':
    import gridworld, util
    grid = gridworld.getCliffGrid3()
    print grid.getStates()
    policy = dict([(state,'east') for state in grid.getStates()])
    values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
    prettyPrintValues(grid, values, policy, currentState = (0,0))
    stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
    qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
    qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
    qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
    prettyPrintQValues(grid, qValues, currentState = (0,0))
@@ -0,0 +1,653 @@
 # util.py
 # -------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import sys
 import inspect
 import heapq, random
 import cStringIO
 class FixedRandom:
    def __init__(self):
        fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \
            2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \
            881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \
            776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \
            4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \
            2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \
            3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \
            3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \
            1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \
            3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \
            3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \
            1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \
            2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \
            1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \
            1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \
            3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \
            2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \
            2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \
            2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \
            1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \
            2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \
            3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \
            3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \
            323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \
            3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \
            2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \
            856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \
            413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \
            3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \
            3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \
            557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \
            3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \
            4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \
            3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \
            1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \
            2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \
            3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \
            764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \
            1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \
            1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \
            1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \
            1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \
            2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \
            2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \
            2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \
            4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \
            3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \
            17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \
            417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \
            647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \
            4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \
            745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \
            796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \
            4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \
            454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \
            2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \
            3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \
            3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \
            662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \
            3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \
            954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \
            770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \
            721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \
            2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \
            1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \
            1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \
            3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \
            509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \
            1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \
            896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \
            2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \
            3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \
            973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \
            1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \
            851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \
            2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \
            3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \
            764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \
            1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \
            2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \
            1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \
            3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \
            1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \
            2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \
            3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \
            1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \
            1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \
            3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \
            3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \
            230984053L, 719791226L, 2718891946L, 624L), None)
        self.random = random.Random()
        self.random.setstate(fixedState)
 """
 Data structures useful for implementing SearchAgents
 """
 class Stack:
    "A container with a last-in-first-out (LIFO) queuing policy."
    def __init__(self):
        self.list = []
    def push(self,item):
        "Push 'item' onto the stack"
        self.list.append(item)
    def pop(self):
        "Pop the most recently pushed item from the stack"
        return self.list.pop()
    def isEmpty(self):
        "Returns true if the stack is empty"
        return len(self.list) == 0
 class Queue:
    "A container with a first-in-first-out (FIFO) queuing policy."
    def __init__(self):
        self.list = []
    def push(self,item):
        "Enqueue the 'item' into the queue"
        self.list.insert(0,item)
    def pop(self):
        """
          Dequeue the earliest enqueued item still in the queue. This
          operation removes the item from the queue.
        """
        return self.list.pop()
    def isEmpty(self):
        "Returns true if the queue is empty"
        return len(self.list) == 0
 class PriorityQueue:
    """
      Implements a priority queue data structure. Each inserted item
      has a priority associated with it and the client is usually interested
      in quick retrieval of the lowest-priority item in the queue. This
      data structure allows O(1) access to the lowest-priority item.
      Note that this PriorityQueue does not allow you to change the priority
      of an item.  However, you may insert the same item multiple times with
      different priorities.
    """
    def  __init__(self):
        self.heap = []
        self.count = 0
    def push(self, item, priority):
        # FIXME: restored old behaviour to check against old results better
        # FIXED: restored to stable behaviour
        entry = (priority, self.count, item)
        # entry = (priority, item)
        heapq.heappush(self.heap, entry)
        self.count += 1
    def pop(self):
        (_, _, item) = heapq.heappop(self.heap)
        #  (_, item) = heapq.heappop(self.heap)
        return item
    def isEmpty(self):
        return len(self.heap) == 0
 class PriorityQueueWithFunction(PriorityQueue):
    """
    Implements a priority queue with the same push/pop signature of the
    Queue and the Stack classes. This is designed for drop-in replacement for
    those two classes. The caller has to provide a priority function, which
    extracts each item's priority.
    """
    def  __init__(self, priorityFunction):
        "priorityFunction (item) -> priority"
        self.priorityFunction = priorityFunction      # store the priority function
        PriorityQueue.__init__(self)        # super-class initializer
    def push(self, item):
        "Adds an item to the queue with priority from the priority function"
        PriorityQueue.push(self, item, self.priorityFunction(item))
 def manhattanDistance( xy1, xy2 ):
    "Returns the Manhattan distance between points xy1 and xy2"
    return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] )
 """
  Data structures and functions useful for various course projects
  The search project should not need anything below this line.
 """
 class Counter(dict):
    """
    A counter keeps track of counts for a set of keys.
    The counter class is an extension of the standard python
    dictionary type.  It is specialized to have number values
    (integers or floats), and includes a handful of additional
    functions to ease the task of counting data.  In particular,
    all keys are defaulted to have value 0.  Using a dictionary:
    a = {}
    print a['test']
    would give an error, while the Counter class analogue:
    >>> a = Counter()
    >>> print a['test']
    0
    returns the default 0 value. Note that to reference a key
    that you know is contained in the counter,
    you can still use the dictionary syntax:
    >>> a = Counter()
    >>> a['test'] = 2
    >>> print a['test']
    2
    This is very useful for counting things without initializing their counts,
    see for example:
    >>> a['blah'] += 1
    >>> print a['blah']
    1
    The counter also includes additional functionality useful in implementing
    the classifiers for this assignment.  Two counters can be added,
    subtracted or multiplied together.  See below for details.  They can
    also be normalized and their total count and arg max can be extracted.
    """
    def __getitem__(self, idx):
        self.setdefault(idx, 0)
        return dict.__getitem__(self, idx)
    def incrementAll(self, keys, count):
        """
        Increments all elements of keys by the same count.
        >>> a = Counter()
        >>> a.incrementAll(['one','two', 'three'], 1)
        >>> a['one']
        1
        >>> a['two']
        1
        """
        for key in keys:
            self[key] += count
    def argMax(self):
        """
        Returns the key with the highest value.
        """
        if len(self.keys()) == 0: return None
        all = self.items()
        values = [x[1] for x in all]
        maxIndex = values.index(max(values))
        return all[maxIndex][0]
    def sortedKeys(self):
        """
        Returns a list of keys sorted by their values.  Keys
        with the highest values will appear first.
        >>> a = Counter()
        >>> a['first'] = -2
        >>> a['second'] = 4
        >>> a['third'] = 1
        >>> a.sortedKeys()
        ['second', 'third', 'first']
        """
        sortedItems = self.items()
        compare = lambda x, y:  sign(y[1] - x[1])
        sortedItems.sort(cmp=compare)
        return [x[0] for x in sortedItems]
    def totalCount(self):
        """
        Returns the sum of counts for all keys.
        """
        return sum(self.values())
    def normalize(self):
        """
        Edits the counter such that the total count of all
        keys sums to 1.  The ratio of counts for all keys
        will remain the same. Note that normalizing an empty
        Counter will result in an error.
        """
        total = float(self.totalCount())
        if total == 0: return
        for key in self.keys():
            self[key] = self[key] / total
    def divideAll(self, divisor):
        """
        Divides all counts by divisor
        """
        divisor = float(divisor)
        for key in self:
            self[key] /= divisor
    def copy(self):
        """
        Returns a copy of the counter
        """
        return Counter(dict.copy(self))
    def __mul__(self, y ):
        """
        Multiplying two counters gives the dot product of their vectors where
        each unique label is a vector element.
        >>> a = Counter()
        >>> b = Counter()
        >>> a['first'] = -2
        >>> a['second'] = 4
        >>> b['first'] = 3
        >>> b['second'] = 5
        >>> a['third'] = 1.5
        >>> a['fourth'] = 2.5
        >>> a * b
        14
        """
        sum = 0
        x = self
        if len(x) > len(y):
            x,y = y,x
        for key in x:
            if key not in y:
                continue
            sum += x[key] * y[key]
        return sum
    def __radd__(self, y):
        """
        Adding another counter to a counter increments the current counter
        by the values stored in the second counter.
        >>> a = Counter()
        >>> b = Counter()
        >>> a['first'] = -2
        >>> a['second'] = 4
        >>> b['first'] = 3
        >>> b['third'] = 1
        >>> a += b
        >>> a['first']
        1
        """
        for key, value in y.items():
            self[key] += value
    def __add__( self, y ):
        """
        Adding two counters gives a counter with the union of all keys and
        counts of the second added to counts of the first.
        >>> a = Counter()
        >>> b = Counter()
        >>> a['first'] = -2
        >>> a['second'] = 4
        >>> b['first'] = 3
        >>> b['third'] = 1
        >>> (a + b)['first']
        1
        """
        addend = Counter()
        for key in self:
            if key in y:
                addend[key] = self[key] + y[key]
            else:
                addend[key] = self[key]
        for key in y:
            if key in self:
                continue
            addend[key] = y[key]
        return addend
    def __sub__( self, y ):
        """
        Subtracting a counter from another gives a counter with the union of all keys and
        counts of the second subtracted from counts of the first.
        >>> a = Counter()
        >>> b = Counter()
        >>> a['first'] = -2
        >>> a['second'] = 4
        >>> b['first'] = 3
        >>> b['third'] = 1
        >>> (a - b)['first']
        -5
        """
        addend = Counter()
        for key in self:
            if key in y:
                addend[key] = self[key] - y[key]
            else:
                addend[key] = self[key]
        for key in y:
            if key in self:
                continue
            addend[key] = -1 * y[key]
        return addend
 def raiseNotDefined():
    fileName = inspect.stack()[1][1]
    line = inspect.stack()[1][2]
    method = inspect.stack()[1][3]
    print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName)
    sys.exit(1)
 def normalize(vectorOrCounter):
    """
    normalize a vector or counter by dividing each value by the sum of all values
    """
    normalizedCounter = Counter()
    if type(vectorOrCounter) == type(normalizedCounter):
        counter = vectorOrCounter
        total = float(counter.totalCount())
        if total == 0: return counter
        for key in counter.keys():
            value = counter[key]
            normalizedCounter[key] = value / total
        return normalizedCounter
    else:
        vector = vectorOrCounter
        s = float(sum(vector))
        if s == 0: return vector
        return [el / s for el in vector]
 def nSample(distribution, values, n):
    if sum(distribution) != 1:
        distribution = normalize(distribution)
    rand = [random.random() for i in range(n)]
    rand.sort()
    samples = []
    samplePos, distPos, cdf = 0,0, distribution[0]
    while samplePos < n:
        if rand[samplePos] < cdf:
            samplePos += 1
            samples.append(values[distPos])
        else:
            distPos += 1
            cdf += distribution[distPos]
    return samples
 def sample(distribution, values = None):
    if type(distribution) == Counter:
        items = sorted(distribution.items())
        distribution = [i[1] for i in items]
        values = [i[0] for i in items]
    if sum(distribution) != 1:
        distribution = normalize(distribution)
    choice = random.random()
    i, total= 0, distribution[0]
    while choice > total:
        i += 1
        total += distribution[i]
    return values[i]
 def sampleFromCounter(ctr):
    items = sorted(ctr.items())
    return sample([v for k,v in items], [k for k,v in items])
 def getProbability(value, distribution, values):
    """
      Gives the probability of a value under a discrete distribution
      defined by (distributions, values).
    """
    total = 0.0
    for prob, val in zip(distribution, values):
        if val == value:
            total += prob
    return total
 def flipCoin( p ):
    r = random.random()
    return r < p
 def chooseFromDistribution( distribution ):
    "Takes either a counter or a list of (prob, key) pairs and samples"
    if type(distribution) == dict or type(distribution) == Counter:
        return sample(distribution)
    r = random.random()
    base = 0.0
    for prob, element in distribution:
        base += prob
        if r <= base: return element
 def nearestPoint( pos ):
    """
    Finds the nearest grid point to a position (discretizes).
    """
    ( current_row, current_col ) = pos
    grid_row = int( current_row + 0.5 )
    grid_col = int( current_col + 0.5 )
    return ( grid_row, grid_col )
 def sign( x ):
    """
    Returns 1 or -1 depending on the sign of x
    """
    if( x >= 0 ):
        return 1
    else:
        return -1
 def arrayInvert(array):
    """
    Inverts a matrix stored as a list of lists.
    """
    result = [[] for i in array]
    for outer in array:
        for inner in range(len(outer)):
            result[inner].append(outer[inner])
    return result
 def matrixAsList( matrix, value = True ):
    """
    Turns a matrix into a list of coordinates matching the specified value
    """
    rows, cols = len( matrix ), len( matrix[0] )
    cells = []
    for row in range( rows ):
        for col in range( cols ):
            if matrix[row][col] == value:
                cells.append( ( row, col ) )
    return cells
 def lookup(name, namespace):
    """
    Get a method or class from any imported module from its name.
    Usage: lookup(functionName, globals())
    """
    dots = name.count('.')
    if dots > 0:
        moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1]
        module = __import__(moduleName)
        return getattr(module, objName)
    else:
        modules = [obj for obj in namespace.values() if str(type(obj)) == "<type 'module'>"]
        options = [getattr(module, name) for module in modules if name in dir(module)]
        options += [obj[1] for obj in namespace.items() if obj[0] == name ]
        if len(options) == 1: return options[0]
        if len(options) > 1: raise Exception, 'Name conflict for %s'
        raise Exception, '%s not found as a method or class' % name
 def pause():
    """
    Pauses the output stream awaiting user feedback.
    """
    print "<Press enter/return to continue>"
    raw_input()
 # code to handle timeouts
 #
 # FIXME
 # NOTE: TimeoutFuncton is NOT reentrant.  Later timeouts will silently
 # disable earlier timeouts.  Could be solved by maintaining a global list
 # of active time outs.  Currently, questions which have test cases calling
 # this have all student code so wrapped.
 #
 import signal
 import time
 class TimeoutFunctionException(Exception):
    """Exception to raise on a timeout"""
    pass
 class TimeoutFunction:
    def __init__(self, function, timeout):
        self.timeout = timeout
        self.function = function
    def handle_timeout(self, signum, frame):
        raise TimeoutFunctionException()
    def __call__(self, *args, **keyArgs):
        # If we have SIGALRM signal, use it to cause an exception if and
        # when this function runs too long.  Otherwise check the time taken
        # after the method has returned, and throw an exception then.
        if hasattr(signal, 'SIGALRM'):
            old = signal.signal(signal.SIGALRM, self.handle_timeout)
            signal.alarm(self.timeout)
            try:
                result = self.function(*args, **keyArgs)
            finally:
                signal.signal(signal.SIGALRM, old)
            signal.alarm(0)
        else:
            startTime = time.time()
            result = self.function(*args, **keyArgs)
            timeElapsed = time.time() - startTime
            if timeElapsed >= self.timeout:
                self.handle_timeout(None, None)
        return result
 _ORIGINAL_STDOUT = None
 _ORIGINAL_STDERR = None
 _MUTED = False
 class WritableNull:
    def write(self, string):
        pass
 def mutePrint():
    global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
    if _MUTED:
        return
    _MUTED = True
    _ORIGINAL_STDOUT = sys.stdout
    #_ORIGINAL_STDERR = sys.stderr
    sys.stdout = WritableNull()
    #sys.stderr = WritableNull()
 def unmutePrint():
    global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
    if not _MUTED:
        return
    _MUTED = False
    sys.stdout = _ORIGINAL_STDOUT
    #sys.stderr = _ORIGINAL_STDERR
@@ -0,0 +1,85 @@
 # valueIterationAgents.py
 # -----------------------
 # Licensing Information:  You are free to use or extend these projects for
 # educational purposes provided that (1) you do not distribute or publish
 # solutions, (2) you retain this notice, and (3) you provide clear
 # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
 # 
 # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
 # The core projects and autograders were primarily created by John DeNero
 # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
 # Student side autograding was added by Brad Miller, Nick Hay, and
 # Pieter Abbeel (pabbeel@cs.berkeley.edu).
 import mdp, util
 from learningAgents import ValueEstimationAgent
 class ValueIterationAgent(ValueEstimationAgent):
    """
        * Please read learningAgents.py before reading this.*
        A ValueIterationAgent takes a Markov decision process
        (see mdp.py) on initialization and runs value iteration
        for a given number of iterations using the supplied
        discount factor.
    """
    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.
          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0
        # Write value iteration code here
        "*** YOUR CODE HERE ***"
    def getValue(self, state):
        """
          Return the value of the state (computed in __init__).
        """
        return self.values[state]
    def computeQValueFromValues(self, state, action):
        """
          Compute the Q-value of action in state from the
          value function stored in self.values.
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def computeActionFromValues(self, state):
        """
          The policy is the best action in the given state
          according to the values currently stored in self.values.
          You may break ties any way you see fit.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return None.
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
    def getPolicy(self, state):
        return self.computeActionFromValues(state)
    def getAction(self, state):
        "Returns the policy at the state (no exploration)."
        return self.computeActionFromValues(state)
    def getQValue(self, state, action):
        return self.computeQValueFromValues(state, action)
		`@@ -0,0 +1,2 @@`
							`max_points: "6"`
							`class: "PassAllTestsQuestion"`
		`@@ -0,0 +1,2 @@`
							`# This is the solution file for test_cases/q2/1-bridge-grid.test.`
							`# File intentionally blank.`
		`@@ -0,0 +1,2 @@`
							`max_points: "1"`
							`class: "PassAllTestsQuestion"`
		`@@ -0,0 +1,2 @@`
							`max_points: "5"`
							`class: "NumberPassedQuestion"`
		`@@ -0,0 +1,2 @@`
							`max_points: "3"`
							`class: "PassAllTestsQuestion"`
		`@@ -0,0 +1,2 @@`
							`max_points: "1"`
							`class: "PartialCreditQuestion"`