Add project 3 RL template.
This commit is contained in:
1
p3_rl/VERSION
Normal file
1
p3_rl/VERSION
Normal file
@@ -0,0 +1 @@
|
|||||||
|
v1.001
|
||||||
73
p3_rl/analysis.py
Normal file
73
p3_rl/analysis.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# analysis.py
|
||||||
|
# -----------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
######################
|
||||||
|
# ANALYSIS QUESTIONS #
|
||||||
|
######################
|
||||||
|
|
||||||
|
# Set the given parameters to obtain the specified policies through
|
||||||
|
# value iteration.
|
||||||
|
|
||||||
|
def question2():
|
||||||
|
answerDiscount = 0.9
|
||||||
|
answerNoise = 0.2
|
||||||
|
return answerDiscount, answerNoise
|
||||||
|
|
||||||
|
def question3a():
|
||||||
|
answerDiscount = None
|
||||||
|
answerNoise = None
|
||||||
|
answerLivingReward = None
|
||||||
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
def question3b():
|
||||||
|
answerDiscount = None
|
||||||
|
answerNoise = None
|
||||||
|
answerLivingReward = None
|
||||||
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
def question3c():
|
||||||
|
answerDiscount = None
|
||||||
|
answerNoise = None
|
||||||
|
answerLivingReward = None
|
||||||
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
def question3d():
|
||||||
|
answerDiscount = None
|
||||||
|
answerNoise = None
|
||||||
|
answerLivingReward = None
|
||||||
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
def question3e():
|
||||||
|
answerDiscount = None
|
||||||
|
answerNoise = None
|
||||||
|
answerLivingReward = None
|
||||||
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
def question6():
|
||||||
|
answerEpsilon = None
|
||||||
|
answerLearningRate = None
|
||||||
|
return answerEpsilon, answerLearningRate
|
||||||
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print 'Answers to analysis questions:'
|
||||||
|
import analysis
|
||||||
|
for q in [q for q in dir(analysis) if q.startswith('question')]:
|
||||||
|
response = getattr(analysis, q)()
|
||||||
|
print ' Question %s:\t%s' % (q, str(response))
|
||||||
351
p3_rl/autograder.py
Normal file
351
p3_rl/autograder.py
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
# autograder.py
|
||||||
|
# -------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
# imports from python standard library
|
||||||
|
import grading
|
||||||
|
import imp
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import projectParams
|
||||||
|
import random
|
||||||
|
random.seed(0)
|
||||||
|
try:
|
||||||
|
from pacman import GameState
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# register arguments and set default values
|
||||||
|
def readCommand(argv):
|
||||||
|
parser = optparse.OptionParser(description = 'Run public tests on student code')
|
||||||
|
parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False, noGraphics=False)
|
||||||
|
parser.add_option('--test-directory',
|
||||||
|
dest = 'testRoot',
|
||||||
|
default = 'test_cases',
|
||||||
|
help = 'Root test directory which contains subdirectories corresponding to each question')
|
||||||
|
parser.add_option('--student-code',
|
||||||
|
dest = 'studentCode',
|
||||||
|
default = projectParams.STUDENT_CODE_DEFAULT,
|
||||||
|
help = 'comma separated list of student code files')
|
||||||
|
parser.add_option('--code-directory',
|
||||||
|
dest = 'codeRoot',
|
||||||
|
default = "",
|
||||||
|
help = 'Root directory containing the student and testClass code')
|
||||||
|
parser.add_option('--test-case-code',
|
||||||
|
dest = 'testCaseCode',
|
||||||
|
default = projectParams.PROJECT_TEST_CLASSES,
|
||||||
|
help = 'class containing testClass classes for this project')
|
||||||
|
parser.add_option('--generate-solutions',
|
||||||
|
dest = 'generateSolutions',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'Write solutions generated to .solution file')
|
||||||
|
parser.add_option('--edx-output',
|
||||||
|
dest = 'edxOutput',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'Generate edX output files')
|
||||||
|
parser.add_option('--mute',
|
||||||
|
dest = 'muteOutput',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'Mute output from executing tests')
|
||||||
|
parser.add_option('--print-tests', '-p',
|
||||||
|
dest = 'printTestCase',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'Print each test case before running them.')
|
||||||
|
parser.add_option('--test', '-t',
|
||||||
|
dest = 'runTest',
|
||||||
|
default = None,
|
||||||
|
help = 'Run one particular test. Relative to test root.')
|
||||||
|
parser.add_option('--question', '-q',
|
||||||
|
dest = 'gradeQuestion',
|
||||||
|
default = None,
|
||||||
|
help = 'Grade one particular question.')
|
||||||
|
parser.add_option('--no-graphics',
|
||||||
|
dest = 'noGraphics',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'No graphics display for pacman games.')
|
||||||
|
(options, args) = parser.parse_args(argv)
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
# confirm we should author solution files
|
||||||
|
def confirmGenerate():
|
||||||
|
print 'WARNING: this action will overwrite any solution files.'
|
||||||
|
print 'Are you sure you want to proceed? (yes/no)'
|
||||||
|
while True:
|
||||||
|
ans = sys.stdin.readline().strip()
|
||||||
|
if ans == 'yes':
|
||||||
|
break
|
||||||
|
elif ans == 'no':
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print 'please answer either "yes" or "no"'
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Fix this so that it tracebacks work correctly
|
||||||
|
# Looking at source of the traceback module, presuming it works
|
||||||
|
# the same as the intepreters, it uses co_filename. This is,
|
||||||
|
# however, a readonly attribute.
|
||||||
|
def setModuleName(module, filename):
|
||||||
|
functionType = type(confirmGenerate)
|
||||||
|
classType = type(optparse.Option)
|
||||||
|
|
||||||
|
for i in dir(module):
|
||||||
|
o = getattr(module, i)
|
||||||
|
if hasattr(o, '__file__'): continue
|
||||||
|
|
||||||
|
if type(o) == functionType:
|
||||||
|
setattr(o, '__file__', filename)
|
||||||
|
elif type(o) == classType:
|
||||||
|
setattr(o, '__file__', filename)
|
||||||
|
# TODO: assign member __file__'s?
|
||||||
|
#print i, type(o)
|
||||||
|
|
||||||
|
|
||||||
|
#from cStringIO import StringIO
|
||||||
|
|
||||||
|
def loadModuleString(moduleSource):
|
||||||
|
# Below broken, imp doesn't believe its being passed a file:
|
||||||
|
# ValueError: load_module arg#2 should be a file or None
|
||||||
|
#
|
||||||
|
#f = StringIO(moduleCodeDict[k])
|
||||||
|
#tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE))
|
||||||
|
tmp = imp.new_module(k)
|
||||||
|
exec moduleCodeDict[k] in tmp.__dict__
|
||||||
|
setModuleName(tmp, k)
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
import py_compile
|
||||||
|
|
||||||
|
def loadModuleFile(moduleName, filePath):
|
||||||
|
with open(filePath, 'r') as f:
|
||||||
|
return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE))
|
||||||
|
|
||||||
|
|
||||||
|
def readFile(path, root=""):
|
||||||
|
"Read file from disk at specified path and return as string"
|
||||||
|
with open(os.path.join(root, path), 'r') as handle:
|
||||||
|
return handle.read()
|
||||||
|
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
# Error Hint Map
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
# TODO: use these
|
||||||
|
ERROR_HINT_MAP = {
|
||||||
|
'q1': {
|
||||||
|
"<type 'exceptions.IndexError'>": """
|
||||||
|
We noticed that your project threw an IndexError on q1.
|
||||||
|
While many things may cause this, it may have been from
|
||||||
|
assuming a certain number of successors from a state space
|
||||||
|
or assuming a certain number of actions available from a given
|
||||||
|
state. Try making your code more general (no hardcoded indices)
|
||||||
|
and submit again!
|
||||||
|
"""
|
||||||
|
},
|
||||||
|
'q3': {
|
||||||
|
"<type 'exceptions.AttributeError'>": """
|
||||||
|
We noticed that your project threw an AttributeError on q3.
|
||||||
|
While many things may cause this, it may have been from assuming
|
||||||
|
a certain size or structure to the state space. For example, if you have
|
||||||
|
a line of code assuming that the state is (x, y) and we run your code
|
||||||
|
on a state space with (x, y, z), this error could be thrown. Try
|
||||||
|
making your code more general and submit again!
|
||||||
|
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
def splitStrings(d):
|
||||||
|
d2 = dict(d)
|
||||||
|
for k in d:
|
||||||
|
if k[0:2] == "__":
|
||||||
|
del d2[k]
|
||||||
|
continue
|
||||||
|
if d2[k].find("\n") >= 0:
|
||||||
|
d2[k] = d2[k].split("\n")
|
||||||
|
return d2
|
||||||
|
|
||||||
|
|
||||||
|
def printTest(testDict, solutionDict):
|
||||||
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
print "Test case:"
|
||||||
|
for line in testDict["__raw_lines__"]:
|
||||||
|
print " |", line
|
||||||
|
print "Solution:"
|
||||||
|
for line in solutionDict["__raw_lines__"]:
|
||||||
|
print " |", line
|
||||||
|
|
||||||
|
|
||||||
|
def runTest(testName, moduleDict, printTestCase=False, display=None):
|
||||||
|
import testParser
|
||||||
|
import testClasses
|
||||||
|
for module in moduleDict:
|
||||||
|
setattr(sys.modules[__name__], module, moduleDict[module])
|
||||||
|
|
||||||
|
testDict = testParser.TestParser(testName + ".test").parse()
|
||||||
|
solutionDict = testParser.TestParser(testName + ".solution").parse()
|
||||||
|
test_out_file = os.path.join('%s.test_output' % testName)
|
||||||
|
testDict['test_out_file'] = test_out_file
|
||||||
|
testClass = getattr(projectTestClasses, testDict['class'])
|
||||||
|
|
||||||
|
questionClass = getattr(testClasses, 'Question')
|
||||||
|
question = questionClass({'max_points': 0}, display)
|
||||||
|
testCase = testClass(question, testDict)
|
||||||
|
|
||||||
|
if printTestCase:
|
||||||
|
printTest(testDict, solutionDict)
|
||||||
|
|
||||||
|
# This is a fragile hack to create a stub grades object
|
||||||
|
grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)])
|
||||||
|
testCase.execute(grades, moduleDict, solutionDict)
|
||||||
|
|
||||||
|
|
||||||
|
# returns all the tests you need to run in order to run question
|
||||||
|
def getDepends(testParser, testRoot, question):
|
||||||
|
allDeps = [question]
|
||||||
|
questionDict = testParser.TestParser(os.path.join(testRoot, question, 'CONFIG')).parse()
|
||||||
|
if 'depends' in questionDict:
|
||||||
|
depends = questionDict['depends'].split()
|
||||||
|
for d in depends:
|
||||||
|
# run dependencies first
|
||||||
|
allDeps = getDepends(testParser, testRoot, d) + allDeps
|
||||||
|
return allDeps
|
||||||
|
|
||||||
|
# get list of questions to grade
|
||||||
|
def getTestSubdirs(testParser, testRoot, questionToGrade):
|
||||||
|
problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse()
|
||||||
|
if questionToGrade != None:
|
||||||
|
questions = getDepends(testParser, testRoot, questionToGrade)
|
||||||
|
if len(questions) > 1:
|
||||||
|
print 'Note: due to dependencies, the following tests will be run: %s' % ' '.join(questions)
|
||||||
|
return questions
|
||||||
|
if 'order' in problemDict:
|
||||||
|
return problemDict['order'].split()
|
||||||
|
return sorted(os.listdir(testRoot))
|
||||||
|
|
||||||
|
|
||||||
|
# evaluate student code
|
||||||
|
def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False,
|
||||||
|
printTestCase=False, questionToGrade=None, display=None):
|
||||||
|
# imports of testbench code. note that the testClasses import must follow
|
||||||
|
# the import of student code due to dependencies
|
||||||
|
import testParser
|
||||||
|
import testClasses
|
||||||
|
for module in moduleDict:
|
||||||
|
setattr(sys.modules[__name__], module, moduleDict[module])
|
||||||
|
|
||||||
|
questions = []
|
||||||
|
questionDicts = {}
|
||||||
|
test_subdirs = getTestSubdirs(testParser, testRoot, questionToGrade)
|
||||||
|
for q in test_subdirs:
|
||||||
|
subdir_path = os.path.join(testRoot, q)
|
||||||
|
if not os.path.isdir(subdir_path) or q[0] == '.':
|
||||||
|
continue
|
||||||
|
|
||||||
|
# create a question object
|
||||||
|
questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse()
|
||||||
|
questionClass = getattr(testClasses, questionDict['class'])
|
||||||
|
question = questionClass(questionDict, display)
|
||||||
|
questionDicts[q] = questionDict
|
||||||
|
|
||||||
|
# load test cases into question
|
||||||
|
tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path))
|
||||||
|
tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests)
|
||||||
|
for t in sorted(tests):
|
||||||
|
test_file = os.path.join(subdir_path, '%s.test' % t)
|
||||||
|
solution_file = os.path.join(subdir_path, '%s.solution' % t)
|
||||||
|
test_out_file = os.path.join(subdir_path, '%s.test_output' % t)
|
||||||
|
testDict = testParser.TestParser(test_file).parse()
|
||||||
|
if testDict.get("disabled", "false").lower() == "true":
|
||||||
|
continue
|
||||||
|
testDict['test_out_file'] = test_out_file
|
||||||
|
testClass = getattr(projectTestClasses, testDict['class'])
|
||||||
|
testCase = testClass(question, testDict)
|
||||||
|
def makefun(testCase, solution_file):
|
||||||
|
if generateSolutions:
|
||||||
|
# write solution file to disk
|
||||||
|
return lambda grades: testCase.writeSolution(moduleDict, solution_file)
|
||||||
|
else:
|
||||||
|
# read in solution dictionary and pass as an argument
|
||||||
|
testDict = testParser.TestParser(test_file).parse()
|
||||||
|
solutionDict = testParser.TestParser(solution_file).parse()
|
||||||
|
if printTestCase:
|
||||||
|
return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict)
|
||||||
|
else:
|
||||||
|
return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
|
||||||
|
question.addTestCase(testCase, makefun(testCase, solution_file))
|
||||||
|
|
||||||
|
# Note extra function is necessary for scoping reasons
|
||||||
|
def makefun(question):
|
||||||
|
return lambda grades: question.execute(grades)
|
||||||
|
setattr(sys.modules[__name__], q, makefun(question))
|
||||||
|
questions.append((q, question.getMaxPoints()))
|
||||||
|
|
||||||
|
grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput)
|
||||||
|
if questionToGrade == None:
|
||||||
|
for q in questionDicts:
|
||||||
|
for prereq in questionDicts[q].get('depends', '').split():
|
||||||
|
grades.addPrereq(q, prereq)
|
||||||
|
|
||||||
|
grades.grade(sys.modules[__name__], bonusPic = projectParams.BONUS_PIC)
|
||||||
|
return grades.points
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getDisplay(graphicsByDefault, options=None):
|
||||||
|
graphics = graphicsByDefault
|
||||||
|
if options is not None and options.noGraphics:
|
||||||
|
graphics = False
|
||||||
|
if graphics:
|
||||||
|
try:
|
||||||
|
import graphicsDisplay
|
||||||
|
return graphicsDisplay.PacmanGraphics(1, frameTime=.05)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
import textDisplay
|
||||||
|
return textDisplay.NullGraphics()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
options = readCommand(sys.argv)
|
||||||
|
if options.generateSolutions:
|
||||||
|
confirmGenerate()
|
||||||
|
codePaths = options.studentCode.split(',')
|
||||||
|
# moduleCodeDict = {}
|
||||||
|
# for cp in codePaths:
|
||||||
|
# moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
|
||||||
|
# moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot)
|
||||||
|
# moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot)
|
||||||
|
# moduleDict = loadModuleDict(moduleCodeDict)
|
||||||
|
|
||||||
|
moduleDict = {}
|
||||||
|
for cp in codePaths:
|
||||||
|
moduleName = re.match('.*?([^/]*)\.py', cp).group(1)
|
||||||
|
moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp))
|
||||||
|
moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1)
|
||||||
|
moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode))
|
||||||
|
|
||||||
|
|
||||||
|
if options.runTest != None:
|
||||||
|
runTest(options.runTest, moduleDict, printTestCase=options.printTestCase, display=getDisplay(True, options))
|
||||||
|
else:
|
||||||
|
evaluate(options.generateSolutions, options.testRoot, moduleDict,
|
||||||
|
edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase,
|
||||||
|
questionToGrade=options.gradeQuestion, display=getDisplay(options.gradeQuestion!=None, options))
|
||||||
384
p3_rl/crawler.py
Normal file
384
p3_rl/crawler.py
Normal file
@@ -0,0 +1,384 @@
|
|||||||
|
# crawler.py
|
||||||
|
# ----------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
#!/usr/bin/python
|
||||||
|
import math
|
||||||
|
from math import pi as PI
|
||||||
|
import time
|
||||||
|
import environment
|
||||||
|
import random
|
||||||
|
|
||||||
|
class CrawlingRobotEnvironment(environment.Environment):
|
||||||
|
|
||||||
|
def __init__(self, crawlingRobot):
|
||||||
|
|
||||||
|
self.crawlingRobot = crawlingRobot
|
||||||
|
|
||||||
|
# The state is of the form (armAngle, handAngle)
|
||||||
|
# where the angles are bucket numbers, not actual
|
||||||
|
# degree measurements
|
||||||
|
self.state = None
|
||||||
|
|
||||||
|
self.nArmStates = 9
|
||||||
|
self.nHandStates = 13
|
||||||
|
|
||||||
|
# create a list of arm buckets and hand buckets to
|
||||||
|
# discretize the state space
|
||||||
|
minArmAngle,maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
|
||||||
|
minHandAngle,maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
|
||||||
|
armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates-1)
|
||||||
|
handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates-1)
|
||||||
|
self.armBuckets = [minArmAngle+(armIncrement*i) \
|
||||||
|
for i in range(self.nArmStates)]
|
||||||
|
self.handBuckets = [minHandAngle+(handIncrement*i) \
|
||||||
|
for i in range(self.nHandStates)]
|
||||||
|
|
||||||
|
# Reset
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def getCurrentState(self):
|
||||||
|
"""
|
||||||
|
Return the current state
|
||||||
|
of the crawling robot
|
||||||
|
"""
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
def getPossibleActions(self, state):
|
||||||
|
"""
|
||||||
|
Returns possible actions
|
||||||
|
for the states in the
|
||||||
|
current state
|
||||||
|
"""
|
||||||
|
|
||||||
|
actions = list()
|
||||||
|
|
||||||
|
currArmBucket,currHandBucket = state
|
||||||
|
if currArmBucket > 0: actions.append('arm-down')
|
||||||
|
if currArmBucket < self.nArmStates-1: actions.append('arm-up')
|
||||||
|
if currHandBucket > 0: actions.append('hand-down')
|
||||||
|
if currHandBucket < self.nHandStates-1: actions.append('hand-up')
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def doAction(self, action):
|
||||||
|
"""
|
||||||
|
Perform the action and update
|
||||||
|
the current state of the Environment
|
||||||
|
and return the reward for the
|
||||||
|
current state, the next state
|
||||||
|
and the taken action.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
nextState, reward
|
||||||
|
"""
|
||||||
|
nextState, reward = None, None
|
||||||
|
|
||||||
|
oldX,oldY = self.crawlingRobot.getRobotPosition()
|
||||||
|
|
||||||
|
armBucket,handBucket = self.state
|
||||||
|
armAngle,handAngle = self.crawlingRobot.getAngles()
|
||||||
|
if action == 'arm-up':
|
||||||
|
newArmAngle = self.armBuckets[armBucket+1]
|
||||||
|
self.crawlingRobot.moveArm(newArmAngle)
|
||||||
|
nextState = (armBucket+1,handBucket)
|
||||||
|
if action == 'arm-down':
|
||||||
|
newArmAngle = self.armBuckets[armBucket-1]
|
||||||
|
self.crawlingRobot.moveArm(newArmAngle)
|
||||||
|
nextState = (armBucket-1,handBucket)
|
||||||
|
if action == 'hand-up':
|
||||||
|
newHandAngle = self.handBuckets[handBucket+1]
|
||||||
|
self.crawlingRobot.moveHand(newHandAngle)
|
||||||
|
nextState = (armBucket,handBucket+1)
|
||||||
|
if action == 'hand-down':
|
||||||
|
newHandAngle = self.handBuckets[handBucket-1]
|
||||||
|
self.crawlingRobot.moveHand(newHandAngle)
|
||||||
|
nextState = (armBucket,handBucket-1)
|
||||||
|
|
||||||
|
newX,newY = self.crawlingRobot.getRobotPosition()
|
||||||
|
|
||||||
|
# a simple reward function
|
||||||
|
reward = newX - oldX
|
||||||
|
|
||||||
|
self.state = nextState
|
||||||
|
return nextState, reward
|
||||||
|
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Resets the Environment to the initial state
|
||||||
|
"""
|
||||||
|
## Initialize the state to be the middle
|
||||||
|
## value for each parameter e.g. if there are 13 and 19
|
||||||
|
## buckets for the arm and hand parameters, then the intial
|
||||||
|
## state should be (6,9)
|
||||||
|
##
|
||||||
|
## Also call self.crawlingRobot.setAngles()
|
||||||
|
## to the initial arm and hand angle
|
||||||
|
|
||||||
|
armState = self.nArmStates/2
|
||||||
|
handState = self.nHandStates/2
|
||||||
|
self.state = armState,handState
|
||||||
|
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
|
||||||
|
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlingRobot:
|
||||||
|
|
||||||
|
def setAngles(self, armAngle, handAngle):
|
||||||
|
"""
|
||||||
|
set the robot's arm and hand angles
|
||||||
|
to the passed in values
|
||||||
|
"""
|
||||||
|
self.armAngle = armAngle
|
||||||
|
self.handAngle = handAngle
|
||||||
|
|
||||||
|
def getAngles(self):
|
||||||
|
"""
|
||||||
|
returns the pair of (armAngle, handAngle)
|
||||||
|
"""
|
||||||
|
return self.armAngle, self.handAngle
|
||||||
|
|
||||||
|
def getRobotPosition(self):
|
||||||
|
"""
|
||||||
|
returns the (x,y) coordinates
|
||||||
|
of the lower-left point of the
|
||||||
|
robot
|
||||||
|
"""
|
||||||
|
return self.robotPos
|
||||||
|
|
||||||
|
def moveArm(self, newArmAngle):
|
||||||
|
"""
|
||||||
|
move the robot arm to 'newArmAngle'
|
||||||
|
"""
|
||||||
|
oldArmAngle = self.armAngle
|
||||||
|
if newArmAngle > self.maxArmAngle:
|
||||||
|
raise 'Crawling Robot: Arm Raised too high. Careful!'
|
||||||
|
if newArmAngle < self.minArmAngle:
|
||||||
|
raise 'Crawling Robot: Arm Raised too low. Careful!'
|
||||||
|
disp = self.displacement(self.armAngle, self.handAngle,
|
||||||
|
newArmAngle, self.handAngle)
|
||||||
|
curXPos = self.robotPos[0]
|
||||||
|
self.robotPos = (curXPos+disp, self.robotPos[1])
|
||||||
|
self.armAngle = newArmAngle
|
||||||
|
|
||||||
|
# Position and Velocity Sign Post
|
||||||
|
self.positions.append(self.getRobotPosition()[0])
|
||||||
|
# self.angleSums.append(abs(math.degrees(oldArmAngle)-math.degrees(newArmAngle)))
|
||||||
|
if len(self.positions) > 100:
|
||||||
|
self.positions.pop(0)
|
||||||
|
# self.angleSums.pop(0)
|
||||||
|
|
||||||
|
def moveHand(self, newHandAngle):
|
||||||
|
"""
|
||||||
|
move the robot hand to 'newArmAngle'
|
||||||
|
"""
|
||||||
|
oldHandAngle = self.handAngle
|
||||||
|
|
||||||
|
if newHandAngle > self.maxHandAngle:
|
||||||
|
raise 'Crawling Robot: Hand Raised too high. Careful!'
|
||||||
|
if newHandAngle < self.minHandAngle:
|
||||||
|
raise 'Crawling Robot: Hand Raised too low. Careful!'
|
||||||
|
disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
|
||||||
|
curXPos = self.robotPos[0]
|
||||||
|
self.robotPos = (curXPos+disp, self.robotPos[1])
|
||||||
|
self.handAngle = newHandAngle
|
||||||
|
|
||||||
|
# Position and Velocity Sign Post
|
||||||
|
self.positions.append(self.getRobotPosition()[0])
|
||||||
|
# self.angleSums.append(abs(math.degrees(oldHandAngle)-math.degrees(newHandAngle)))
|
||||||
|
if len(self.positions) > 100:
|
||||||
|
self.positions.pop(0)
|
||||||
|
# self.angleSums.pop(0)
|
||||||
|
|
||||||
|
def getMinAndMaxArmAngles(self):
|
||||||
|
"""
|
||||||
|
get the lower- and upper- bound
|
||||||
|
for the arm angles returns (min,max) pair
|
||||||
|
"""
|
||||||
|
return self.minArmAngle, self.maxArmAngle
|
||||||
|
|
||||||
|
def getMinAndMaxHandAngles(self):
|
||||||
|
"""
|
||||||
|
get the lower- and upper- bound
|
||||||
|
for the hand angles returns (min,max) pair
|
||||||
|
"""
|
||||||
|
return self.minHandAngle, self.maxHandAngle
|
||||||
|
|
||||||
|
def getRotationAngle(self):
|
||||||
|
"""
|
||||||
|
get the current angle the
|
||||||
|
robot body is rotated off the ground
|
||||||
|
"""
|
||||||
|
armCos, armSin = self.__getCosAndSin(self.armAngle)
|
||||||
|
handCos, handSin = self.__getCosAndSin(self.handAngle)
|
||||||
|
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
|
||||||
|
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
|
||||||
|
if y < 0:
|
||||||
|
return math.atan(-y/x)
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
## You shouldn't need methods below here
|
||||||
|
|
||||||
|
|
||||||
|
def __getCosAndSin(self, angle):
|
||||||
|
return math.cos(angle), math.sin(angle)
|
||||||
|
|
||||||
|
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
|
||||||
|
|
||||||
|
oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
|
||||||
|
armCos, armSin = self.__getCosAndSin(armDegree)
|
||||||
|
oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
|
||||||
|
handCos, handSin = self.__getCosAndSin(handDegree)
|
||||||
|
|
||||||
|
xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
|
||||||
|
yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
|
||||||
|
|
||||||
|
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
|
||||||
|
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
|
||||||
|
|
||||||
|
if y < 0:
|
||||||
|
if yOld <= 0:
|
||||||
|
return math.sqrt(xOld*xOld + yOld*yOld) - math.sqrt(x*x + y*y)
|
||||||
|
return (xOld - yOld*(x-xOld) / (y - yOld)) - math.sqrt(x*x + y*y)
|
||||||
|
else:
|
||||||
|
if yOld >= 0:
|
||||||
|
return 0.0
|
||||||
|
return -(x - y * (xOld-x)/(yOld-y)) + math.sqrt(xOld*xOld + yOld*yOld)
|
||||||
|
|
||||||
|
raise 'Never Should See This!'
|
||||||
|
|
||||||
|
def draw(self, stepCount, stepDelay):
|
||||||
|
x1, y1 = self.getRobotPosition()
|
||||||
|
x1 = x1 % self.totWidth
|
||||||
|
|
||||||
|
## Check Lower Still on the ground
|
||||||
|
if y1 != self.groundY:
|
||||||
|
raise 'Flying Robot!!'
|
||||||
|
|
||||||
|
rotationAngle = self.getRotationAngle()
|
||||||
|
cosRot, sinRot = self.__getCosAndSin(rotationAngle)
|
||||||
|
|
||||||
|
x2 = x1 + self.robotWidth * cosRot
|
||||||
|
y2 = y1 - self.robotWidth * sinRot
|
||||||
|
|
||||||
|
x3 = x1 - self.robotHeight * sinRot
|
||||||
|
y3 = y1 - self.robotHeight * cosRot
|
||||||
|
|
||||||
|
x4 = x3 + cosRot*self.robotWidth
|
||||||
|
y4 = y3 - sinRot*self.robotWidth
|
||||||
|
|
||||||
|
self.canvas.coords(self.robotBody,x1,y1,x2,y2,x4,y4,x3,y3)
|
||||||
|
|
||||||
|
armCos, armSin = self.__getCosAndSin(rotationAngle+self.armAngle)
|
||||||
|
xArm = x4 + self.armLength * armCos
|
||||||
|
yArm = y4 - self.armLength * armSin
|
||||||
|
|
||||||
|
self.canvas.coords(self.robotArm,x4,y4,xArm,yArm)
|
||||||
|
|
||||||
|
handCos, handSin = self.__getCosAndSin(self.handAngle+rotationAngle)
|
||||||
|
xHand = xArm + self.handLength * handCos
|
||||||
|
yHand = yArm - self.handLength * handSin
|
||||||
|
|
||||||
|
self.canvas.coords(self.robotHand,xArm,yArm,xHand,yHand)
|
||||||
|
|
||||||
|
|
||||||
|
# Position and Velocity Sign Post
|
||||||
|
# time = len(self.positions) + 0.5 * sum(self.angleSums)
|
||||||
|
# velocity = (self.positions[-1]-self.positions[0]) / time
|
||||||
|
# if len(self.positions) == 1: return
|
||||||
|
steps = (stepCount - self.lastStep)
|
||||||
|
if steps==0:return
|
||||||
|
# pos = self.positions[-1]
|
||||||
|
# velocity = (pos - self.lastPos) / steps
|
||||||
|
# g = .9 ** (10 * stepDelay)
|
||||||
|
# g = .99 ** steps
|
||||||
|
# self.velAvg = g * self.velAvg + (1 - g) * velocity
|
||||||
|
# g = .999 ** steps
|
||||||
|
# self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
|
||||||
|
pos = self.positions[-1]
|
||||||
|
velocity = pos - self.positions[-2]
|
||||||
|
vel2 = (pos - self.positions[0]) / len(self.positions)
|
||||||
|
self.velAvg = .9 * self.velAvg + .1 * vel2
|
||||||
|
velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
|
||||||
|
# velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
|
||||||
|
velocityMsg = 'Velocity: %.2f' % velocity
|
||||||
|
positionMsg = 'Position: %2.f' % pos
|
||||||
|
stepMsg = 'Step: %d' % stepCount
|
||||||
|
if 'vel_msg' in dir(self):
|
||||||
|
self.canvas.delete(self.vel_msg)
|
||||||
|
self.canvas.delete(self.pos_msg)
|
||||||
|
self.canvas.delete(self.step_msg)
|
||||||
|
self.canvas.delete(self.velavg_msg)
|
||||||
|
# self.canvas.delete(self.velavg2_msg)
|
||||||
|
# self.velavg2_msg = self.canvas.create_text(850,190,text=velMsg2)
|
||||||
|
self.velavg_msg = self.canvas.create_text(650,190,text=velMsg)
|
||||||
|
self.vel_msg = self.canvas.create_text(450,190,text=velocityMsg)
|
||||||
|
self.pos_msg = self.canvas.create_text(250,190,text=positionMsg)
|
||||||
|
self.step_msg = self.canvas.create_text(50,190,text=stepMsg)
|
||||||
|
# self.lastPos = pos
|
||||||
|
self.lastStep = stepCount
|
||||||
|
# self.lastVel = velocity
|
||||||
|
|
||||||
|
def __init__(self, canvas):
|
||||||
|
|
||||||
|
## Canvas ##
|
||||||
|
self.canvas = canvas
|
||||||
|
self.velAvg = 0
|
||||||
|
# self.velAvg2 = 0
|
||||||
|
# self.lastPos = 0
|
||||||
|
self.lastStep = 0
|
||||||
|
# self.lastVel = 0
|
||||||
|
|
||||||
|
## Arm and Hand Degrees ##
|
||||||
|
self.armAngle = self.oldArmDegree = 0.0
|
||||||
|
self.handAngle = self.oldHandDegree = -PI/6
|
||||||
|
|
||||||
|
self.maxArmAngle = PI/6
|
||||||
|
self.minArmAngle = -PI/6
|
||||||
|
|
||||||
|
self.maxHandAngle = 0
|
||||||
|
self.minHandAngle = -(5.0/6.0) * PI
|
||||||
|
|
||||||
|
## Draw Ground ##
|
||||||
|
self.totWidth = canvas.winfo_reqwidth()
|
||||||
|
self.totHeight = canvas.winfo_reqheight()
|
||||||
|
self.groundHeight = 40
|
||||||
|
self.groundY = self.totHeight - self.groundHeight
|
||||||
|
|
||||||
|
self.ground = canvas.create_rectangle(0,
|
||||||
|
self.groundY,self.totWidth,self.totHeight, fill='blue')
|
||||||
|
|
||||||
|
## Robot Body ##
|
||||||
|
self.robotWidth = 80
|
||||||
|
self.robotHeight = 40
|
||||||
|
self.robotPos = (20, self.groundY)
|
||||||
|
self.robotBody = canvas.create_polygon(0,0,0,0,0,0,0,0, fill='green')
|
||||||
|
|
||||||
|
## Robot Arm ##
|
||||||
|
self.armLength = 60
|
||||||
|
self.robotArm = canvas.create_line(0,0,0,0,fill='orange',width=5)
|
||||||
|
|
||||||
|
## Robot Hand ##
|
||||||
|
self.handLength = 40
|
||||||
|
self.robotHand = canvas.create_line(0,0,0,0,fill='red',width=3)
|
||||||
|
|
||||||
|
self.positions = [0,0]
|
||||||
|
# self.angleSums = [0,0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from graphicsCrawlerDisplay import *
|
||||||
|
run()
|
||||||
56
p3_rl/environment.py
Normal file
56
p3_rl/environment.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# environment.py
|
||||||
|
# --------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
class Environment:
|
||||||
|
|
||||||
|
def getCurrentState(self):
|
||||||
|
"""
|
||||||
|
Returns the current state of enviornment
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def getPossibleActions(self, state):
|
||||||
|
"""
|
||||||
|
Returns possible actions the agent
|
||||||
|
can take in the given state. Can
|
||||||
|
return the empty list if we are in
|
||||||
|
a terminal state.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def doAction(self, action):
|
||||||
|
"""
|
||||||
|
Performs the given action in the current
|
||||||
|
environment state and updates the enviornment.
|
||||||
|
|
||||||
|
Returns a (reward, nextState) pair
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Resets the current state to the start state
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def isTerminal(self):
|
||||||
|
"""
|
||||||
|
Has the enviornment entered a terminal
|
||||||
|
state? This means there are no successors
|
||||||
|
"""
|
||||||
|
state = self.getCurrentState()
|
||||||
|
actions = self.getPossibleActions(state)
|
||||||
|
return len(actions) == 0
|
||||||
103
p3_rl/featureExtractors.py
Normal file
103
p3_rl/featureExtractors.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
# featureExtractors.py
|
||||||
|
# --------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
"Feature extractors for Pacman game states"
|
||||||
|
|
||||||
|
from game import Directions, Actions
|
||||||
|
import util
|
||||||
|
|
||||||
|
class FeatureExtractor:
|
||||||
|
def getFeatures(self, state, action):
|
||||||
|
"""
|
||||||
|
Returns a dict from features to counts
|
||||||
|
Usually, the count will just be 1.0 for
|
||||||
|
indicator functions.
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
class IdentityExtractor(FeatureExtractor):
|
||||||
|
def getFeatures(self, state, action):
|
||||||
|
feats = util.Counter()
|
||||||
|
feats[(state,action)] = 1.0
|
||||||
|
return feats
|
||||||
|
|
||||||
|
class CoordinateExtractor(FeatureExtractor):
|
||||||
|
def getFeatures(self, state, action):
|
||||||
|
feats = util.Counter()
|
||||||
|
feats[state] = 1.0
|
||||||
|
feats['x=%d' % state[0]] = 1.0
|
||||||
|
feats['y=%d' % state[0]] = 1.0
|
||||||
|
feats['action=%s' % action] = 1.0
|
||||||
|
return feats
|
||||||
|
|
||||||
|
def closestFood(pos, food, walls):
|
||||||
|
"""
|
||||||
|
closestFood -- this is similar to the function that we have
|
||||||
|
worked on in the search project; here its all in one place
|
||||||
|
"""
|
||||||
|
fringe = [(pos[0], pos[1], 0)]
|
||||||
|
expanded = set()
|
||||||
|
while fringe:
|
||||||
|
pos_x, pos_y, dist = fringe.pop(0)
|
||||||
|
if (pos_x, pos_y) in expanded:
|
||||||
|
continue
|
||||||
|
expanded.add((pos_x, pos_y))
|
||||||
|
# if we find a food at this location then exit
|
||||||
|
if food[pos_x][pos_y]:
|
||||||
|
return dist
|
||||||
|
# otherwise spread out from the location to its neighbours
|
||||||
|
nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
|
||||||
|
for nbr_x, nbr_y in nbrs:
|
||||||
|
fringe.append((nbr_x, nbr_y, dist+1))
|
||||||
|
# no food found
|
||||||
|
return None
|
||||||
|
|
||||||
|
class SimpleExtractor(FeatureExtractor):
|
||||||
|
"""
|
||||||
|
Returns simple features for a basic reflex Pacman:
|
||||||
|
- whether food will be eaten
|
||||||
|
- how far away the next food is
|
||||||
|
- whether a ghost collision is imminent
|
||||||
|
- whether a ghost is one step away
|
||||||
|
"""
|
||||||
|
|
||||||
|
def getFeatures(self, state, action):
|
||||||
|
# extract the grid of food and wall locations and get the ghost locations
|
||||||
|
food = state.getFood()
|
||||||
|
walls = state.getWalls()
|
||||||
|
ghosts = state.getGhostPositions()
|
||||||
|
|
||||||
|
features = util.Counter()
|
||||||
|
|
||||||
|
features["bias"] = 1.0
|
||||||
|
|
||||||
|
# compute the location of pacman after he takes the action
|
||||||
|
x, y = state.getPacmanPosition()
|
||||||
|
dx, dy = Actions.directionToVector(action)
|
||||||
|
next_x, next_y = int(x + dx), int(y + dy)
|
||||||
|
|
||||||
|
# count the number of ghosts 1-step away
|
||||||
|
features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts)
|
||||||
|
|
||||||
|
# if there is no danger of ghosts then add the food feature
|
||||||
|
if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
|
||||||
|
features["eats-food"] = 1.0
|
||||||
|
|
||||||
|
dist = closestFood((next_x, next_y), food, walls)
|
||||||
|
if dist is not None:
|
||||||
|
# make the distance a number less than one otherwise the update
|
||||||
|
# will diverge wildly
|
||||||
|
features["closest-food"] = float(dist) / (walls.width * walls.height)
|
||||||
|
features.divideAll(10.0)
|
||||||
|
return features
|
||||||
729
p3_rl/game.py
Normal file
729
p3_rl/game.py
Normal file
@@ -0,0 +1,729 @@
|
|||||||
|
# game.py
|
||||||
|
# -------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
# game.py
|
||||||
|
# -------
|
||||||
|
# Licensing Information: Please do not distribute or publish solutions to this
|
||||||
|
# project. You are free to use and extend these projects for educational
|
||||||
|
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
|
||||||
|
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
|
||||||
|
|
||||||
|
from util import *
|
||||||
|
import time, os
|
||||||
|
import traceback
|
||||||
|
import sys
|
||||||
|
|
||||||
|
#######################
|
||||||
|
# Parts worth reading #
|
||||||
|
#######################
|
||||||
|
|
||||||
|
class Agent:
|
||||||
|
"""
|
||||||
|
An agent must define a getAction method, but may also define the
|
||||||
|
following methods which will be called if they exist:
|
||||||
|
|
||||||
|
def registerInitialState(self, state): # inspects the starting state
|
||||||
|
"""
|
||||||
|
def __init__(self, index=0):
|
||||||
|
self.index = index
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
"""
|
||||||
|
The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
|
||||||
|
must return an action from Directions.{North, South, East, West, Stop}
|
||||||
|
"""
|
||||||
|
raiseNotDefined()
|
||||||
|
|
||||||
|
class Directions:
|
||||||
|
NORTH = 'North'
|
||||||
|
SOUTH = 'South'
|
||||||
|
EAST = 'East'
|
||||||
|
WEST = 'West'
|
||||||
|
STOP = 'Stop'
|
||||||
|
|
||||||
|
LEFT = {NORTH: WEST,
|
||||||
|
SOUTH: EAST,
|
||||||
|
EAST: NORTH,
|
||||||
|
WEST: SOUTH,
|
||||||
|
STOP: STOP}
|
||||||
|
|
||||||
|
RIGHT = dict([(y,x) for x, y in LEFT.items()])
|
||||||
|
|
||||||
|
REVERSE = {NORTH: SOUTH,
|
||||||
|
SOUTH: NORTH,
|
||||||
|
EAST: WEST,
|
||||||
|
WEST: EAST,
|
||||||
|
STOP: STOP}
|
||||||
|
|
||||||
|
class Configuration:
|
||||||
|
"""
|
||||||
|
A Configuration holds the (x,y) coordinate of a character, along with its
|
||||||
|
traveling direction.
|
||||||
|
|
||||||
|
The convention for positions, like a graph, is that (0,0) is the lower left corner, x increases
|
||||||
|
horizontally and y increases vertically. Therefore, north is the direction of increasing y, or (0,1).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, pos, direction):
|
||||||
|
self.pos = pos
|
||||||
|
self.direction = direction
|
||||||
|
|
||||||
|
def getPosition(self):
|
||||||
|
return (self.pos)
|
||||||
|
|
||||||
|
def getDirection(self):
|
||||||
|
return self.direction
|
||||||
|
|
||||||
|
def isInteger(self):
|
||||||
|
x,y = self.pos
|
||||||
|
return x == int(x) and y == int(y)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if other == None: return False
|
||||||
|
return (self.pos == other.pos and self.direction == other.direction)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
x = hash(self.pos)
|
||||||
|
y = hash(self.direction)
|
||||||
|
return hash(x + 13 * y)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "(x,y)="+str(self.pos)+", "+str(self.direction)
|
||||||
|
|
||||||
|
def generateSuccessor(self, vector):
|
||||||
|
"""
|
||||||
|
Generates a new configuration reached by translating the current
|
||||||
|
configuration by the action vector. This is a low-level call and does
|
||||||
|
not attempt to respect the legality of the movement.
|
||||||
|
|
||||||
|
Actions are movement vectors.
|
||||||
|
"""
|
||||||
|
x, y= self.pos
|
||||||
|
dx, dy = vector
|
||||||
|
direction = Actions.vectorToDirection(vector)
|
||||||
|
if direction == Directions.STOP:
|
||||||
|
direction = self.direction # There is no stop direction
|
||||||
|
return Configuration((x + dx, y+dy), direction)
|
||||||
|
|
||||||
|
class AgentState:
|
||||||
|
"""
|
||||||
|
AgentStates hold the state of an agent (configuration, speed, scared, etc).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__( self, startConfiguration, isPacman ):
|
||||||
|
self.start = startConfiguration
|
||||||
|
self.configuration = startConfiguration
|
||||||
|
self.isPacman = isPacman
|
||||||
|
self.scaredTimer = 0
|
||||||
|
self.numCarrying = 0
|
||||||
|
self.numReturned = 0
|
||||||
|
|
||||||
|
def __str__( self ):
|
||||||
|
if self.isPacman:
|
||||||
|
return "Pacman: " + str( self.configuration )
|
||||||
|
else:
|
||||||
|
return "Ghost: " + str( self.configuration )
|
||||||
|
|
||||||
|
def __eq__( self, other ):
|
||||||
|
if other == None:
|
||||||
|
return False
|
||||||
|
return self.configuration == other.configuration and self.scaredTimer == other.scaredTimer
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(hash(self.configuration) + 13 * hash(self.scaredTimer))
|
||||||
|
|
||||||
|
def copy( self ):
|
||||||
|
state = AgentState( self.start, self.isPacman )
|
||||||
|
state.configuration = self.configuration
|
||||||
|
state.scaredTimer = self.scaredTimer
|
||||||
|
state.numCarrying = self.numCarrying
|
||||||
|
state.numReturned = self.numReturned
|
||||||
|
return state
|
||||||
|
|
||||||
|
def getPosition(self):
|
||||||
|
if self.configuration == None: return None
|
||||||
|
return self.configuration.getPosition()
|
||||||
|
|
||||||
|
def getDirection(self):
|
||||||
|
return self.configuration.getDirection()
|
||||||
|
|
||||||
|
class Grid:
|
||||||
|
"""
|
||||||
|
A 2-dimensional array of objects backed by a list of lists. Data is accessed
|
||||||
|
via grid[x][y] where (x,y) are positions on a Pacman map with x horizontal,
|
||||||
|
y vertical and the origin (0,0) in the bottom left corner.
|
||||||
|
|
||||||
|
The __str__ method constructs an output that is oriented like a pacman board.
|
||||||
|
"""
|
||||||
|
def __init__(self, width, height, initialValue=False, bitRepresentation=None):
|
||||||
|
if initialValue not in [False, True]: raise Exception('Grids can only contain booleans')
|
||||||
|
self.CELLS_PER_INT = 30
|
||||||
|
|
||||||
|
self.width = width
|
||||||
|
self.height = height
|
||||||
|
self.data = [[initialValue for y in range(height)] for x in range(width)]
|
||||||
|
if bitRepresentation:
|
||||||
|
self._unpackBits(bitRepresentation)
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self.data[i]
|
||||||
|
|
||||||
|
def __setitem__(self, key, item):
|
||||||
|
self.data[key] = item
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
out = [[str(self.data[x][y])[0] for x in range(self.width)] for y in range(self.height)]
|
||||||
|
out.reverse()
|
||||||
|
return '\n'.join([''.join(x) for x in out])
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if other == None: return False
|
||||||
|
return self.data == other.data
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
# return hash(str(self))
|
||||||
|
base = 1
|
||||||
|
h = 0
|
||||||
|
for l in self.data:
|
||||||
|
for i in l:
|
||||||
|
if i:
|
||||||
|
h += base
|
||||||
|
base *= 2
|
||||||
|
return hash(h)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
g = Grid(self.width, self.height)
|
||||||
|
g.data = [x[:] for x in self.data]
|
||||||
|
return g
|
||||||
|
|
||||||
|
def deepCopy(self):
|
||||||
|
return self.copy()
|
||||||
|
|
||||||
|
def shallowCopy(self):
|
||||||
|
g = Grid(self.width, self.height)
|
||||||
|
g.data = self.data
|
||||||
|
return g
|
||||||
|
|
||||||
|
def count(self, item =True ):
|
||||||
|
return sum([x.count(item) for x in self.data])
|
||||||
|
|
||||||
|
def asList(self, key = True):
|
||||||
|
list = []
|
||||||
|
for x in range(self.width):
|
||||||
|
for y in range(self.height):
|
||||||
|
if self[x][y] == key: list.append( (x,y) )
|
||||||
|
return list
|
||||||
|
|
||||||
|
def packBits(self):
|
||||||
|
"""
|
||||||
|
Returns an efficient int list representation
|
||||||
|
|
||||||
|
(width, height, bitPackedInts...)
|
||||||
|
"""
|
||||||
|
bits = [self.width, self.height]
|
||||||
|
currentInt = 0
|
||||||
|
for i in range(self.height * self.width):
|
||||||
|
bit = self.CELLS_PER_INT - (i % self.CELLS_PER_INT) - 1
|
||||||
|
x, y = self._cellIndexToPosition(i)
|
||||||
|
if self[x][y]:
|
||||||
|
currentInt += 2 ** bit
|
||||||
|
if (i + 1) % self.CELLS_PER_INT == 0:
|
||||||
|
bits.append(currentInt)
|
||||||
|
currentInt = 0
|
||||||
|
bits.append(currentInt)
|
||||||
|
return tuple(bits)
|
||||||
|
|
||||||
|
def _cellIndexToPosition(self, index):
|
||||||
|
x = index / self.height
|
||||||
|
y = index % self.height
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def _unpackBits(self, bits):
|
||||||
|
"""
|
||||||
|
Fills in data from a bit-level representation
|
||||||
|
"""
|
||||||
|
cell = 0
|
||||||
|
for packed in bits:
|
||||||
|
for bit in self._unpackInt(packed, self.CELLS_PER_INT):
|
||||||
|
if cell == self.width * self.height: break
|
||||||
|
x, y = self._cellIndexToPosition(cell)
|
||||||
|
self[x][y] = bit
|
||||||
|
cell += 1
|
||||||
|
|
||||||
|
def _unpackInt(self, packed, size):
|
||||||
|
bools = []
|
||||||
|
if packed < 0: raise ValueError, "must be a positive integer"
|
||||||
|
for i in range(size):
|
||||||
|
n = 2 ** (self.CELLS_PER_INT - i - 1)
|
||||||
|
if packed >= n:
|
||||||
|
bools.append(True)
|
||||||
|
packed -= n
|
||||||
|
else:
|
||||||
|
bools.append(False)
|
||||||
|
return bools
|
||||||
|
|
||||||
|
def reconstituteGrid(bitRep):
|
||||||
|
if type(bitRep) is not type((1,2)):
|
||||||
|
return bitRep
|
||||||
|
width, height = bitRep[:2]
|
||||||
|
return Grid(width, height, bitRepresentation= bitRep[2:])
|
||||||
|
|
||||||
|
####################################
|
||||||
|
# Parts you shouldn't have to read #
|
||||||
|
####################################
|
||||||
|
|
||||||
|
class Actions:
|
||||||
|
"""
|
||||||
|
A collection of static methods for manipulating move actions.
|
||||||
|
"""
|
||||||
|
# Directions
|
||||||
|
_directions = {Directions.NORTH: (0, 1),
|
||||||
|
Directions.SOUTH: (0, -1),
|
||||||
|
Directions.EAST: (1, 0),
|
||||||
|
Directions.WEST: (-1, 0),
|
||||||
|
Directions.STOP: (0, 0)}
|
||||||
|
|
||||||
|
_directionsAsList = _directions.items()
|
||||||
|
|
||||||
|
TOLERANCE = .001
|
||||||
|
|
||||||
|
def reverseDirection(action):
|
||||||
|
if action == Directions.NORTH:
|
||||||
|
return Directions.SOUTH
|
||||||
|
if action == Directions.SOUTH:
|
||||||
|
return Directions.NORTH
|
||||||
|
if action == Directions.EAST:
|
||||||
|
return Directions.WEST
|
||||||
|
if action == Directions.WEST:
|
||||||
|
return Directions.EAST
|
||||||
|
return action
|
||||||
|
reverseDirection = staticmethod(reverseDirection)
|
||||||
|
|
||||||
|
def vectorToDirection(vector):
|
||||||
|
dx, dy = vector
|
||||||
|
if dy > 0:
|
||||||
|
return Directions.NORTH
|
||||||
|
if dy < 0:
|
||||||
|
return Directions.SOUTH
|
||||||
|
if dx < 0:
|
||||||
|
return Directions.WEST
|
||||||
|
if dx > 0:
|
||||||
|
return Directions.EAST
|
||||||
|
return Directions.STOP
|
||||||
|
vectorToDirection = staticmethod(vectorToDirection)
|
||||||
|
|
||||||
|
def directionToVector(direction, speed = 1.0):
|
||||||
|
dx, dy = Actions._directions[direction]
|
||||||
|
return (dx * speed, dy * speed)
|
||||||
|
directionToVector = staticmethod(directionToVector)
|
||||||
|
|
||||||
|
def getPossibleActions(config, walls):
|
||||||
|
possible = []
|
||||||
|
x, y = config.pos
|
||||||
|
x_int, y_int = int(x + 0.5), int(y + 0.5)
|
||||||
|
|
||||||
|
# In between grid points, all agents must continue straight
|
||||||
|
if (abs(x - x_int) + abs(y - y_int) > Actions.TOLERANCE):
|
||||||
|
return [config.getDirection()]
|
||||||
|
|
||||||
|
for dir, vec in Actions._directionsAsList:
|
||||||
|
dx, dy = vec
|
||||||
|
next_y = y_int + dy
|
||||||
|
next_x = x_int + dx
|
||||||
|
if not walls[next_x][next_y]: possible.append(dir)
|
||||||
|
|
||||||
|
return possible
|
||||||
|
|
||||||
|
getPossibleActions = staticmethod(getPossibleActions)
|
||||||
|
|
||||||
|
def getLegalNeighbors(position, walls):
|
||||||
|
x,y = position
|
||||||
|
x_int, y_int = int(x + 0.5), int(y + 0.5)
|
||||||
|
neighbors = []
|
||||||
|
for dir, vec in Actions._directionsAsList:
|
||||||
|
dx, dy = vec
|
||||||
|
next_x = x_int + dx
|
||||||
|
if next_x < 0 or next_x == walls.width: continue
|
||||||
|
next_y = y_int + dy
|
||||||
|
if next_y < 0 or next_y == walls.height: continue
|
||||||
|
if not walls[next_x][next_y]: neighbors.append((next_x, next_y))
|
||||||
|
return neighbors
|
||||||
|
getLegalNeighbors = staticmethod(getLegalNeighbors)
|
||||||
|
|
||||||
|
def getSuccessor(position, action):
|
||||||
|
dx, dy = Actions.directionToVector(action)
|
||||||
|
x, y = position
|
||||||
|
return (x + dx, y + dy)
|
||||||
|
getSuccessor = staticmethod(getSuccessor)
|
||||||
|
|
||||||
|
class GameStateData:
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__( self, prevState = None ):
|
||||||
|
"""
|
||||||
|
Generates a new data packet by copying information from its predecessor.
|
||||||
|
"""
|
||||||
|
if prevState != None:
|
||||||
|
self.food = prevState.food.shallowCopy()
|
||||||
|
self.capsules = prevState.capsules[:]
|
||||||
|
self.agentStates = self.copyAgentStates( prevState.agentStates )
|
||||||
|
self.layout = prevState.layout
|
||||||
|
self._eaten = prevState._eaten
|
||||||
|
self.score = prevState.score
|
||||||
|
|
||||||
|
self._foodEaten = None
|
||||||
|
self._foodAdded = None
|
||||||
|
self._capsuleEaten = None
|
||||||
|
self._agentMoved = None
|
||||||
|
self._lose = False
|
||||||
|
self._win = False
|
||||||
|
self.scoreChange = 0
|
||||||
|
|
||||||
|
def deepCopy( self ):
|
||||||
|
state = GameStateData( self )
|
||||||
|
state.food = self.food.deepCopy()
|
||||||
|
state.layout = self.layout.deepCopy()
|
||||||
|
state._agentMoved = self._agentMoved
|
||||||
|
state._foodEaten = self._foodEaten
|
||||||
|
state._foodAdded = self._foodAdded
|
||||||
|
state._capsuleEaten = self._capsuleEaten
|
||||||
|
return state
|
||||||
|
|
||||||
|
def copyAgentStates( self, agentStates ):
|
||||||
|
copiedStates = []
|
||||||
|
for agentState in agentStates:
|
||||||
|
copiedStates.append( agentState.copy() )
|
||||||
|
return copiedStates
|
||||||
|
|
||||||
|
def __eq__( self, other ):
|
||||||
|
"""
|
||||||
|
Allows two states to be compared.
|
||||||
|
"""
|
||||||
|
if other == None: return False
|
||||||
|
# TODO Check for type of other
|
||||||
|
if not self.agentStates == other.agentStates: return False
|
||||||
|
if not self.food == other.food: return False
|
||||||
|
if not self.capsules == other.capsules: return False
|
||||||
|
if not self.score == other.score: return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __hash__( self ):
|
||||||
|
"""
|
||||||
|
Allows states to be keys of dictionaries.
|
||||||
|
"""
|
||||||
|
for i, state in enumerate( self.agentStates ):
|
||||||
|
try:
|
||||||
|
int(hash(state))
|
||||||
|
except TypeError, e:
|
||||||
|
print e
|
||||||
|
#hash(state)
|
||||||
|
return int((hash(tuple(self.agentStates)) + 13*hash(self.food) + 113* hash(tuple(self.capsules)) + 7 * hash(self.score)) % 1048575 )
|
||||||
|
|
||||||
|
def __str__( self ):
|
||||||
|
width, height = self.layout.width, self.layout.height
|
||||||
|
map = Grid(width, height)
|
||||||
|
if type(self.food) == type((1,2)):
|
||||||
|
self.food = reconstituteGrid(self.food)
|
||||||
|
for x in range(width):
|
||||||
|
for y in range(height):
|
||||||
|
food, walls = self.food, self.layout.walls
|
||||||
|
map[x][y] = self._foodWallStr(food[x][y], walls[x][y])
|
||||||
|
|
||||||
|
for agentState in self.agentStates:
|
||||||
|
if agentState == None: continue
|
||||||
|
if agentState.configuration == None: continue
|
||||||
|
x,y = [int( i ) for i in nearestPoint( agentState.configuration.pos )]
|
||||||
|
agent_dir = agentState.configuration.direction
|
||||||
|
if agentState.isPacman:
|
||||||
|
map[x][y] = self._pacStr( agent_dir )
|
||||||
|
else:
|
||||||
|
map[x][y] = self._ghostStr( agent_dir )
|
||||||
|
|
||||||
|
for x, y in self.capsules:
|
||||||
|
map[x][y] = 'o'
|
||||||
|
|
||||||
|
return str(map) + ("\nScore: %d\n" % self.score)
|
||||||
|
|
||||||
|
def _foodWallStr( self, hasFood, hasWall ):
|
||||||
|
if hasFood:
|
||||||
|
return '.'
|
||||||
|
elif hasWall:
|
||||||
|
return '%'
|
||||||
|
else:
|
||||||
|
return ' '
|
||||||
|
|
||||||
|
def _pacStr( self, dir ):
|
||||||
|
if dir == Directions.NORTH:
|
||||||
|
return 'v'
|
||||||
|
if dir == Directions.SOUTH:
|
||||||
|
return '^'
|
||||||
|
if dir == Directions.WEST:
|
||||||
|
return '>'
|
||||||
|
return '<'
|
||||||
|
|
||||||
|
def _ghostStr( self, dir ):
|
||||||
|
return 'G'
|
||||||
|
if dir == Directions.NORTH:
|
||||||
|
return 'M'
|
||||||
|
if dir == Directions.SOUTH:
|
||||||
|
return 'W'
|
||||||
|
if dir == Directions.WEST:
|
||||||
|
return '3'
|
||||||
|
return 'E'
|
||||||
|
|
||||||
|
def initialize( self, layout, numGhostAgents ):
|
||||||
|
"""
|
||||||
|
Creates an initial game state from a layout array (see layout.py).
|
||||||
|
"""
|
||||||
|
self.food = layout.food.copy()
|
||||||
|
#self.capsules = []
|
||||||
|
self.capsules = layout.capsules[:]
|
||||||
|
self.layout = layout
|
||||||
|
self.score = 0
|
||||||
|
self.scoreChange = 0
|
||||||
|
|
||||||
|
self.agentStates = []
|
||||||
|
numGhosts = 0
|
||||||
|
for isPacman, pos in layout.agentPositions:
|
||||||
|
if not isPacman:
|
||||||
|
if numGhosts == numGhostAgents: continue # Max ghosts reached already
|
||||||
|
else: numGhosts += 1
|
||||||
|
self.agentStates.append( AgentState( Configuration( pos, Directions.STOP), isPacman) )
|
||||||
|
self._eaten = [False for a in self.agentStates]
|
||||||
|
|
||||||
|
try:
|
||||||
|
import boinc
|
||||||
|
_BOINC_ENABLED = True
|
||||||
|
except:
|
||||||
|
_BOINC_ENABLED = False
|
||||||
|
|
||||||
|
class Game:
|
||||||
|
"""
|
||||||
|
The Game manages the control flow, soliciting actions from agents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False ):
|
||||||
|
self.agentCrashed = False
|
||||||
|
self.agents = agents
|
||||||
|
self.display = display
|
||||||
|
self.rules = rules
|
||||||
|
self.startingIndex = startingIndex
|
||||||
|
self.gameOver = False
|
||||||
|
self.muteAgents = muteAgents
|
||||||
|
self.catchExceptions = catchExceptions
|
||||||
|
self.moveHistory = []
|
||||||
|
self.totalAgentTimes = [0 for agent in agents]
|
||||||
|
self.totalAgentTimeWarnings = [0 for agent in agents]
|
||||||
|
self.agentTimeout = False
|
||||||
|
import cStringIO
|
||||||
|
self.agentOutput = [cStringIO.StringIO() for agent in agents]
|
||||||
|
|
||||||
|
def getProgress(self):
|
||||||
|
if self.gameOver:
|
||||||
|
return 1.0
|
||||||
|
else:
|
||||||
|
return self.rules.getProgress(self)
|
||||||
|
|
||||||
|
def _agentCrash( self, agentIndex, quiet=False):
|
||||||
|
"Helper method for handling agent crashes"
|
||||||
|
if not quiet: traceback.print_exc()
|
||||||
|
self.gameOver = True
|
||||||
|
self.agentCrashed = True
|
||||||
|
self.rules.agentCrash(self, agentIndex)
|
||||||
|
|
||||||
|
OLD_STDOUT = None
|
||||||
|
OLD_STDERR = None
|
||||||
|
|
||||||
|
def mute(self, agentIndex):
|
||||||
|
if not self.muteAgents: return
|
||||||
|
global OLD_STDOUT, OLD_STDERR
|
||||||
|
import cStringIO
|
||||||
|
OLD_STDOUT = sys.stdout
|
||||||
|
OLD_STDERR = sys.stderr
|
||||||
|
sys.stdout = self.agentOutput[agentIndex]
|
||||||
|
sys.stderr = self.agentOutput[agentIndex]
|
||||||
|
|
||||||
|
def unmute(self):
|
||||||
|
if not self.muteAgents: return
|
||||||
|
global OLD_STDOUT, OLD_STDERR
|
||||||
|
# Revert stdout/stderr to originals
|
||||||
|
sys.stdout = OLD_STDOUT
|
||||||
|
sys.stderr = OLD_STDERR
|
||||||
|
|
||||||
|
|
||||||
|
def run( self ):
|
||||||
|
"""
|
||||||
|
Main control loop for game play.
|
||||||
|
"""
|
||||||
|
self.display.initialize(self.state.data)
|
||||||
|
self.numMoves = 0
|
||||||
|
|
||||||
|
###self.display.initialize(self.state.makeObservation(1).data)
|
||||||
|
# inform learning agents of the game start
|
||||||
|
for i in range(len(self.agents)):
|
||||||
|
agent = self.agents[i]
|
||||||
|
if not agent:
|
||||||
|
self.mute(i)
|
||||||
|
# this is a null agent, meaning it failed to load
|
||||||
|
# the other team wins
|
||||||
|
print >>sys.stderr, "Agent %d failed to load" % i
|
||||||
|
self.unmute()
|
||||||
|
self._agentCrash(i, quiet=True)
|
||||||
|
return
|
||||||
|
if ("registerInitialState" in dir(agent)):
|
||||||
|
self.mute(i)
|
||||||
|
if self.catchExceptions:
|
||||||
|
try:
|
||||||
|
timed_func = TimeoutFunction(agent.registerInitialState, int(self.rules.getMaxStartupTime(i)))
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
timed_func(self.state.deepCopy())
|
||||||
|
time_taken = time.time() - start_time
|
||||||
|
self.totalAgentTimes[i] += time_taken
|
||||||
|
except TimeoutFunctionException:
|
||||||
|
print >>sys.stderr, "Agent %d ran out of time on startup!" % i
|
||||||
|
self.unmute()
|
||||||
|
self.agentTimeout = True
|
||||||
|
self._agentCrash(i, quiet=True)
|
||||||
|
return
|
||||||
|
except Exception,data:
|
||||||
|
self._agentCrash(i, quiet=False)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
agent.registerInitialState(self.state.deepCopy())
|
||||||
|
## TODO: could this exceed the total time
|
||||||
|
self.unmute()
|
||||||
|
|
||||||
|
agentIndex = self.startingIndex
|
||||||
|
numAgents = len( self.agents )
|
||||||
|
|
||||||
|
while not self.gameOver:
|
||||||
|
# Fetch the next agent
|
||||||
|
agent = self.agents[agentIndex]
|
||||||
|
move_time = 0
|
||||||
|
skip_action = False
|
||||||
|
# Generate an observation of the state
|
||||||
|
if 'observationFunction' in dir( agent ):
|
||||||
|
self.mute(agentIndex)
|
||||||
|
if self.catchExceptions:
|
||||||
|
try:
|
||||||
|
timed_func = TimeoutFunction(agent.observationFunction, int(self.rules.getMoveTimeout(agentIndex)))
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
observation = timed_func(self.state.deepCopy())
|
||||||
|
except TimeoutFunctionException:
|
||||||
|
skip_action = True
|
||||||
|
move_time += time.time() - start_time
|
||||||
|
self.unmute()
|
||||||
|
except Exception,data:
|
||||||
|
self._agentCrash(agentIndex, quiet=False)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
observation = agent.observationFunction(self.state.deepCopy())
|
||||||
|
self.unmute()
|
||||||
|
else:
|
||||||
|
observation = self.state.deepCopy()
|
||||||
|
|
||||||
|
# Solicit an action
|
||||||
|
action = None
|
||||||
|
self.mute(agentIndex)
|
||||||
|
if self.catchExceptions:
|
||||||
|
try:
|
||||||
|
timed_func = TimeoutFunction(agent.getAction, int(self.rules.getMoveTimeout(agentIndex)) - int(move_time))
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
if skip_action:
|
||||||
|
raise TimeoutFunctionException()
|
||||||
|
action = timed_func( observation )
|
||||||
|
except TimeoutFunctionException:
|
||||||
|
print >>sys.stderr, "Agent %d timed out on a single move!" % agentIndex
|
||||||
|
self.agentTimeout = True
|
||||||
|
self._agentCrash(agentIndex, quiet=True)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
|
||||||
|
move_time += time.time() - start_time
|
||||||
|
|
||||||
|
if move_time > self.rules.getMoveWarningTime(agentIndex):
|
||||||
|
self.totalAgentTimeWarnings[agentIndex] += 1
|
||||||
|
print >>sys.stderr, "Agent %d took too long to make a move! This is warning %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
|
||||||
|
if self.totalAgentTimeWarnings[agentIndex] > self.rules.getMaxTimeWarnings(agentIndex):
|
||||||
|
print >>sys.stderr, "Agent %d exceeded the maximum number of warnings: %d" % (agentIndex, self.totalAgentTimeWarnings[agentIndex])
|
||||||
|
self.agentTimeout = True
|
||||||
|
self._agentCrash(agentIndex, quiet=True)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
|
||||||
|
self.totalAgentTimes[agentIndex] += move_time
|
||||||
|
#print "Agent: %d, time: %f, total: %f" % (agentIndex, move_time, self.totalAgentTimes[agentIndex])
|
||||||
|
if self.totalAgentTimes[agentIndex] > self.rules.getMaxTotalTime(agentIndex):
|
||||||
|
print >>sys.stderr, "Agent %d ran out of time! (time: %1.2f)" % (agentIndex, self.totalAgentTimes[agentIndex])
|
||||||
|
self.agentTimeout = True
|
||||||
|
self._agentCrash(agentIndex, quiet=True)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
self.unmute()
|
||||||
|
except Exception,data:
|
||||||
|
self._agentCrash(agentIndex)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
action = agent.getAction(observation)
|
||||||
|
self.unmute()
|
||||||
|
|
||||||
|
# Execute the action
|
||||||
|
self.moveHistory.append( (agentIndex, action) )
|
||||||
|
if self.catchExceptions:
|
||||||
|
try:
|
||||||
|
self.state = self.state.generateSuccessor( agentIndex, action )
|
||||||
|
except Exception,data:
|
||||||
|
self.mute(agentIndex)
|
||||||
|
self._agentCrash(agentIndex)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
self.state = self.state.generateSuccessor( agentIndex, action )
|
||||||
|
|
||||||
|
# Change the display
|
||||||
|
self.display.update( self.state.data )
|
||||||
|
###idx = agentIndex - agentIndex % 2 + 1
|
||||||
|
###self.display.update( self.state.makeObservation(idx).data )
|
||||||
|
|
||||||
|
# Allow for game specific conditions (winning, losing, etc.)
|
||||||
|
self.rules.process(self.state, self)
|
||||||
|
# Track progress
|
||||||
|
if agentIndex == numAgents + 1: self.numMoves += 1
|
||||||
|
# Next agent
|
||||||
|
agentIndex = ( agentIndex + 1 ) % numAgents
|
||||||
|
|
||||||
|
if _BOINC_ENABLED:
|
||||||
|
boinc.set_fraction_done(self.getProgress())
|
||||||
|
|
||||||
|
# inform a learning agent of the game result
|
||||||
|
for agentIndex, agent in enumerate(self.agents):
|
||||||
|
if "final" in dir( agent ) :
|
||||||
|
try:
|
||||||
|
self.mute(agentIndex)
|
||||||
|
agent.final( self.state )
|
||||||
|
self.unmute()
|
||||||
|
except Exception,data:
|
||||||
|
if not self.catchExceptions: raise
|
||||||
|
self._agentCrash(agentIndex)
|
||||||
|
self.unmute()
|
||||||
|
return
|
||||||
|
self.display.finish()
|
||||||
81
p3_rl/ghostAgents.py
Normal file
81
p3_rl/ghostAgents.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# ghostAgents.py
|
||||||
|
# --------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from game import Agent
|
||||||
|
from game import Actions
|
||||||
|
from game import Directions
|
||||||
|
import random
|
||||||
|
from util import manhattanDistance
|
||||||
|
import util
|
||||||
|
|
||||||
|
class GhostAgent( Agent ):
|
||||||
|
def __init__( self, index ):
|
||||||
|
self.index = index
|
||||||
|
|
||||||
|
def getAction( self, state ):
|
||||||
|
dist = self.getDistribution(state)
|
||||||
|
if len(dist) == 0:
|
||||||
|
return Directions.STOP
|
||||||
|
else:
|
||||||
|
return util.chooseFromDistribution( dist )
|
||||||
|
|
||||||
|
def getDistribution(self, state):
|
||||||
|
"Returns a Counter encoding a distribution over actions from the provided state."
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
class RandomGhost( GhostAgent ):
|
||||||
|
"A ghost that chooses a legal action uniformly at random."
|
||||||
|
def getDistribution( self, state ):
|
||||||
|
dist = util.Counter()
|
||||||
|
for a in state.getLegalActions( self.index ): dist[a] = 1.0
|
||||||
|
dist.normalize()
|
||||||
|
return dist
|
||||||
|
|
||||||
|
class DirectionalGhost( GhostAgent ):
|
||||||
|
"A ghost that prefers to rush Pacman, or flee when scared."
|
||||||
|
def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ):
|
||||||
|
self.index = index
|
||||||
|
self.prob_attack = prob_attack
|
||||||
|
self.prob_scaredFlee = prob_scaredFlee
|
||||||
|
|
||||||
|
def getDistribution( self, state ):
|
||||||
|
# Read variables from state
|
||||||
|
ghostState = state.getGhostState( self.index )
|
||||||
|
legalActions = state.getLegalActions( self.index )
|
||||||
|
pos = state.getGhostPosition( self.index )
|
||||||
|
isScared = ghostState.scaredTimer > 0
|
||||||
|
|
||||||
|
speed = 1
|
||||||
|
if isScared: speed = 0.5
|
||||||
|
|
||||||
|
actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions]
|
||||||
|
newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors]
|
||||||
|
pacmanPosition = state.getPacmanPosition()
|
||||||
|
|
||||||
|
# Select best actions given the state
|
||||||
|
distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions]
|
||||||
|
if isScared:
|
||||||
|
bestScore = max( distancesToPacman )
|
||||||
|
bestProb = self.prob_scaredFlee
|
||||||
|
else:
|
||||||
|
bestScore = min( distancesToPacman )
|
||||||
|
bestProb = self.prob_attack
|
||||||
|
bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore]
|
||||||
|
|
||||||
|
# Construct distribution
|
||||||
|
dist = util.Counter()
|
||||||
|
for a in bestActions: dist[a] = bestProb / len(bestActions)
|
||||||
|
for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions)
|
||||||
|
dist.normalize()
|
||||||
|
return dist
|
||||||
282
p3_rl/grading.py
Normal file
282
p3_rl/grading.py
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
# grading.py
|
||||||
|
# ----------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
"Common code for autograders"
|
||||||
|
|
||||||
|
import cgi
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
import pdb
|
||||||
|
from collections import defaultdict
|
||||||
|
import util
|
||||||
|
|
||||||
|
class Grades:
|
||||||
|
"A data structure for project grades, along with formatting code to display them"
|
||||||
|
def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False):
|
||||||
|
"""
|
||||||
|
Defines the grading scheme for a project
|
||||||
|
projectName: project name
|
||||||
|
questionsAndMaxesDict: a list of (question name, max points per question)
|
||||||
|
"""
|
||||||
|
self.questions = [el[0] for el in questionsAndMaxesList]
|
||||||
|
self.maxes = dict(questionsAndMaxesList)
|
||||||
|
self.points = Counter()
|
||||||
|
self.messages = dict([(q, []) for q in self.questions])
|
||||||
|
self.project = projectName
|
||||||
|
self.start = time.localtime()[1:6]
|
||||||
|
self.sane = True # Sanity checks
|
||||||
|
self.currentQuestion = None # Which question we're grading
|
||||||
|
self.edxOutput = edxOutput
|
||||||
|
self.mute = muteOutput
|
||||||
|
self.prereqs = defaultdict(set)
|
||||||
|
|
||||||
|
#print 'Autograder transcript for %s' % self.project
|
||||||
|
print 'Starting on %d-%d at %d:%02d:%02d' % self.start
|
||||||
|
|
||||||
|
def addPrereq(self, question, prereq):
|
||||||
|
self.prereqs[question].add(prereq)
|
||||||
|
|
||||||
|
def grade(self, gradingModule, exceptionMap = {}, bonusPic = False):
|
||||||
|
"""
|
||||||
|
Grades each question
|
||||||
|
gradingModule: the module with all the grading functions (pass in with sys.modules[__name__])
|
||||||
|
"""
|
||||||
|
|
||||||
|
completedQuestions = set([])
|
||||||
|
for q in self.questions:
|
||||||
|
print '\nQuestion %s' % q
|
||||||
|
print '=' * (9 + len(q))
|
||||||
|
print
|
||||||
|
self.currentQuestion = q
|
||||||
|
|
||||||
|
incompleted = self.prereqs[q].difference(completedQuestions)
|
||||||
|
if len(incompleted) > 0:
|
||||||
|
prereq = incompleted.pop()
|
||||||
|
print \
|
||||||
|
"""*** NOTE: Make sure to complete Question %s before working on Question %s,
|
||||||
|
*** because Question %s builds upon your answer for Question %s.
|
||||||
|
""" % (prereq, q, q, prereq)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self.mute: util.mutePrint()
|
||||||
|
try:
|
||||||
|
util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
|
||||||
|
#TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function
|
||||||
|
except Exception, inst:
|
||||||
|
self.addExceptionMessage(q, inst, traceback)
|
||||||
|
self.addErrorHints(exceptionMap, inst, q[1])
|
||||||
|
except:
|
||||||
|
self.fail('FAIL: Terminated with a string exception.')
|
||||||
|
finally:
|
||||||
|
if self.mute: util.unmutePrint()
|
||||||
|
|
||||||
|
if self.points[q] >= self.maxes[q]:
|
||||||
|
completedQuestions.add(q)
|
||||||
|
|
||||||
|
print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q])
|
||||||
|
|
||||||
|
|
||||||
|
print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6]
|
||||||
|
print "\nProvisional grades\n=================="
|
||||||
|
|
||||||
|
for q in self.questions:
|
||||||
|
print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q])
|
||||||
|
print '------------------'
|
||||||
|
print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values()))
|
||||||
|
if bonusPic and self.points.totalCount() == 25:
|
||||||
|
print """
|
||||||
|
|
||||||
|
ALL HAIL GRANDPAC.
|
||||||
|
LONG LIVE THE GHOSTBUSTING KING.
|
||||||
|
|
||||||
|
--- ---- ---
|
||||||
|
| \ / + \ / |
|
||||||
|
| + \--/ \--/ + |
|
||||||
|
| + + |
|
||||||
|
| + + + |
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
\ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
V \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
\ / @@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
V @@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
/\ @@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
/\ / @@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
/ \ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
/ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||||
|
@@@@@@@@@@@@@@@@@@
|
||||||
|
|
||||||
|
"""
|
||||||
|
print """
|
||||||
|
Your grades are NOT yet registered. To register your grades, make sure
|
||||||
|
to follow your instructor's guidelines to receive credit on your project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.edxOutput:
|
||||||
|
self.produceOutput()
|
||||||
|
|
||||||
|
def addExceptionMessage(self, q, inst, traceback):
|
||||||
|
"""
|
||||||
|
Method to format the exception message, this is more complicated because
|
||||||
|
we need to cgi.escape the traceback but wrap the exception in a <pre> tag
|
||||||
|
"""
|
||||||
|
self.fail('FAIL: Exception raised: %s' % inst)
|
||||||
|
self.addMessage('')
|
||||||
|
for line in traceback.format_exc().split('\n'):
|
||||||
|
self.addMessage(line)
|
||||||
|
|
||||||
|
def addErrorHints(self, exceptionMap, errorInstance, questionNum):
|
||||||
|
typeOf = str(type(errorInstance))
|
||||||
|
questionName = 'q' + questionNum
|
||||||
|
errorHint = ''
|
||||||
|
|
||||||
|
# question specific error hints
|
||||||
|
if exceptionMap.get(questionName):
|
||||||
|
questionMap = exceptionMap.get(questionName)
|
||||||
|
if (questionMap.get(typeOf)):
|
||||||
|
errorHint = questionMap.get(typeOf)
|
||||||
|
# fall back to general error messages if a question specific
|
||||||
|
# one does not exist
|
||||||
|
if (exceptionMap.get(typeOf)):
|
||||||
|
errorHint = exceptionMap.get(typeOf)
|
||||||
|
|
||||||
|
# dont include the HTML if we have no error hint
|
||||||
|
if not errorHint:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
for line in errorHint.split('\n'):
|
||||||
|
self.addMessage(line)
|
||||||
|
|
||||||
|
def produceOutput(self):
|
||||||
|
edxOutput = open('edx_response.html', 'w')
|
||||||
|
edxOutput.write("<div>")
|
||||||
|
|
||||||
|
# first sum
|
||||||
|
total_possible = sum(self.maxes.values())
|
||||||
|
total_score = sum(self.points.values())
|
||||||
|
checkOrX = '<span class="incorrect"/>'
|
||||||
|
if (total_score >= total_possible):
|
||||||
|
checkOrX = '<span class="correct"/>'
|
||||||
|
header = """
|
||||||
|
<h3>
|
||||||
|
Total score ({total_score} / {total_possible})
|
||||||
|
</h3>
|
||||||
|
""".format(total_score = total_score,
|
||||||
|
total_possible = total_possible,
|
||||||
|
checkOrX = checkOrX
|
||||||
|
)
|
||||||
|
edxOutput.write(header)
|
||||||
|
|
||||||
|
for q in self.questions:
|
||||||
|
if len(q) == 2:
|
||||||
|
name = q[1]
|
||||||
|
else:
|
||||||
|
name = q
|
||||||
|
checkOrX = '<span class="incorrect"/>'
|
||||||
|
if (self.points[q] == self.maxes[q]):
|
||||||
|
checkOrX = '<span class="correct"/>'
|
||||||
|
#messages = '\n<br/>\n'.join(self.messages[q])
|
||||||
|
messages = "<pre>%s</pre>" % '\n'.join(self.messages[q])
|
||||||
|
output = """
|
||||||
|
<div class="test">
|
||||||
|
<section>
|
||||||
|
<div class="shortform">
|
||||||
|
Question {q} ({points}/{max}) {checkOrX}
|
||||||
|
</div>
|
||||||
|
<div class="longform">
|
||||||
|
{messages}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
""".format(q = name,
|
||||||
|
max = self.maxes[q],
|
||||||
|
messages = messages,
|
||||||
|
checkOrX = checkOrX,
|
||||||
|
points = self.points[q]
|
||||||
|
)
|
||||||
|
# print "*** output for Question %s " % q[1]
|
||||||
|
# print output
|
||||||
|
edxOutput.write(output)
|
||||||
|
edxOutput.write("</div>")
|
||||||
|
edxOutput.close()
|
||||||
|
edxOutput = open('edx_grade', 'w')
|
||||||
|
edxOutput.write(str(self.points.totalCount()))
|
||||||
|
edxOutput.close()
|
||||||
|
|
||||||
|
def fail(self, message, raw=False):
|
||||||
|
"Sets sanity check bit to false and outputs a message"
|
||||||
|
self.sane = False
|
||||||
|
self.assignZeroCredit()
|
||||||
|
self.addMessage(message, raw)
|
||||||
|
|
||||||
|
def assignZeroCredit(self):
|
||||||
|
self.points[self.currentQuestion] = 0
|
||||||
|
|
||||||
|
def addPoints(self, amt):
|
||||||
|
self.points[self.currentQuestion] += amt
|
||||||
|
|
||||||
|
def deductPoints(self, amt):
|
||||||
|
self.points[self.currentQuestion] -= amt
|
||||||
|
|
||||||
|
def assignFullCredit(self, message="", raw=False):
|
||||||
|
self.points[self.currentQuestion] = self.maxes[self.currentQuestion]
|
||||||
|
if message != "":
|
||||||
|
self.addMessage(message, raw)
|
||||||
|
|
||||||
|
def addMessage(self, message, raw=False):
|
||||||
|
if not raw:
|
||||||
|
# We assume raw messages, formatted for HTML, are printed separately
|
||||||
|
if self.mute: util.unmutePrint()
|
||||||
|
print '*** ' + message
|
||||||
|
if self.mute: util.mutePrint()
|
||||||
|
message = cgi.escape(message)
|
||||||
|
self.messages[self.currentQuestion].append(message)
|
||||||
|
|
||||||
|
def addMessageToEmail(self, message):
|
||||||
|
print "WARNING**** addMessageToEmail is deprecated %s" % message
|
||||||
|
for line in message.split('\n'):
|
||||||
|
pass
|
||||||
|
#print '%%% ' + line + ' %%%'
|
||||||
|
#self.messages[self.currentQuestion].append(line)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Counter(dict):
|
||||||
|
"""
|
||||||
|
Dict with default 0
|
||||||
|
"""
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
try:
|
||||||
|
return dict.__getitem__(self, idx)
|
||||||
|
except KeyError:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def totalCount(self):
|
||||||
|
"""
|
||||||
|
Returns the sum of counts for all keys.
|
||||||
|
"""
|
||||||
|
return sum(self.values())
|
||||||
|
|
||||||
333
p3_rl/graphicsCrawlerDisplay.py
Normal file
333
p3_rl/graphicsCrawlerDisplay.py
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
# graphicsCrawlerDisplay.py
|
||||||
|
# -------------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
# graphicsCrawlerDisplay.py
|
||||||
|
# -------------------------
|
||||||
|
# Licensing Information: Please do not distribute or publish solutions to this
|
||||||
|
# project. You are free to use and extend these projects for educational
|
||||||
|
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
|
||||||
|
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
|
||||||
|
# Abbeel in Spring 2013.
|
||||||
|
# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
|
||||||
|
|
||||||
|
import Tkinter
|
||||||
|
import qlearningAgents
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import sys
|
||||||
|
import crawler
|
||||||
|
#import pendulum
|
||||||
|
import math
|
||||||
|
from math import pi as PI
|
||||||
|
|
||||||
|
robotType = 'crawler'
|
||||||
|
|
||||||
|
class Application:
|
||||||
|
|
||||||
|
def sigmoid(self, x):
|
||||||
|
return 1.0 / (1.0 + 2.0 ** (-x))
|
||||||
|
|
||||||
|
def incrementSpeed(self, inc):
|
||||||
|
self.tickTime *= inc
|
||||||
|
# self.epsilon = min(1.0, self.epsilon)
|
||||||
|
# self.epsilon = max(0.0,self.epsilon)
|
||||||
|
# self.learner.setSpeed(self.epsilon)
|
||||||
|
self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
|
||||||
|
|
||||||
|
def incrementEpsilon(self, inc):
|
||||||
|
self.ep += inc
|
||||||
|
self.epsilon = self.sigmoid(self.ep)
|
||||||
|
self.learner.setEpsilon(self.epsilon)
|
||||||
|
self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
|
||||||
|
|
||||||
|
def incrementGamma(self, inc):
|
||||||
|
self.ga += inc
|
||||||
|
self.gamma = self.sigmoid(self.ga)
|
||||||
|
self.learner.setDiscount(self.gamma)
|
||||||
|
self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
|
||||||
|
|
||||||
|
def incrementAlpha(self, inc):
|
||||||
|
self.al += inc
|
||||||
|
self.alpha = self.sigmoid(self.al)
|
||||||
|
self.learner.setLearningRate(self.alpha)
|
||||||
|
self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
|
||||||
|
|
||||||
|
def __initGUI(self, win):
|
||||||
|
## Window ##
|
||||||
|
self.win = win
|
||||||
|
|
||||||
|
## Initialize Frame ##
|
||||||
|
win.grid()
|
||||||
|
self.dec = -.5
|
||||||
|
self.inc = .5
|
||||||
|
self.tickTime = 0.1
|
||||||
|
|
||||||
|
## Epsilon Button + Label ##
|
||||||
|
self.setupSpeedButtonAndLabel(win)
|
||||||
|
|
||||||
|
self.setupEpsilonButtonAndLabel(win)
|
||||||
|
|
||||||
|
## Gamma Button + Label ##
|
||||||
|
self.setUpGammaButtonAndLabel(win)
|
||||||
|
|
||||||
|
## Alpha Button + Label ##
|
||||||
|
self.setupAlphaButtonAndLabel(win)
|
||||||
|
|
||||||
|
## Exit Button ##
|
||||||
|
#self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
|
||||||
|
#self.exit_button.grid(row=0, column=9)
|
||||||
|
|
||||||
|
## Simulation Buttons ##
|
||||||
|
# self.setupSimulationButtons(win)
|
||||||
|
|
||||||
|
## Canvas ##
|
||||||
|
self.canvas = Tkinter.Canvas(root, height=200, width=1000)
|
||||||
|
self.canvas.grid(row=2,columnspan=10)
|
||||||
|
|
||||||
|
def setupAlphaButtonAndLabel(self, win):
|
||||||
|
self.alpha_minus = Tkinter.Button(win,
|
||||||
|
text="-",command=(lambda: self.incrementAlpha(self.dec)))
|
||||||
|
self.alpha_minus.grid(row=1, column=3, padx=10)
|
||||||
|
|
||||||
|
self.alpha = self.sigmoid(self.al)
|
||||||
|
self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
|
||||||
|
self.alpha_label.grid(row=1, column=4)
|
||||||
|
|
||||||
|
self.alpha_plus = Tkinter.Button(win,
|
||||||
|
text="+",command=(lambda: self.incrementAlpha(self.inc)))
|
||||||
|
self.alpha_plus.grid(row=1, column=5, padx=10)
|
||||||
|
|
||||||
|
def setUpGammaButtonAndLabel(self, win):
|
||||||
|
self.gamma_minus = Tkinter.Button(win,
|
||||||
|
text="-",command=(lambda: self.incrementGamma(self.dec)))
|
||||||
|
self.gamma_minus.grid(row=1, column=0, padx=10)
|
||||||
|
|
||||||
|
self.gamma = self.sigmoid(self.ga)
|
||||||
|
self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
|
||||||
|
self.gamma_label.grid(row=1, column=1)
|
||||||
|
|
||||||
|
self.gamma_plus = Tkinter.Button(win,
|
||||||
|
text="+",command=(lambda: self.incrementGamma(self.inc)))
|
||||||
|
self.gamma_plus.grid(row=1, column=2, padx=10)
|
||||||
|
|
||||||
|
def setupEpsilonButtonAndLabel(self, win):
|
||||||
|
self.epsilon_minus = Tkinter.Button(win,
|
||||||
|
text="-",command=(lambda: self.incrementEpsilon(self.dec)))
|
||||||
|
self.epsilon_minus.grid(row=0, column=3)
|
||||||
|
|
||||||
|
self.epsilon = self.sigmoid(self.ep)
|
||||||
|
self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
|
||||||
|
self.epsilon_label.grid(row=0, column=4)
|
||||||
|
|
||||||
|
self.epsilon_plus = Tkinter.Button(win,
|
||||||
|
text="+",command=(lambda: self.incrementEpsilon(self.inc)))
|
||||||
|
self.epsilon_plus.grid(row=0, column=5)
|
||||||
|
|
||||||
|
def setupSpeedButtonAndLabel(self, win):
|
||||||
|
self.speed_minus = Tkinter.Button(win,
|
||||||
|
text="-",command=(lambda: self.incrementSpeed(.5)))
|
||||||
|
self.speed_minus.grid(row=0, column=0)
|
||||||
|
|
||||||
|
self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
|
||||||
|
self.speed_label.grid(row=0, column=1)
|
||||||
|
|
||||||
|
self.speed_plus = Tkinter.Button(win,
|
||||||
|
text="+",command=(lambda: self.incrementSpeed(2)))
|
||||||
|
self.speed_plus.grid(row=0, column=2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def skip5kSteps(self):
|
||||||
|
self.stepsToSkip = 5000
|
||||||
|
|
||||||
|
def __init__(self, win):
|
||||||
|
|
||||||
|
self.ep = 0
|
||||||
|
self.ga = 2
|
||||||
|
self.al = 2
|
||||||
|
self.stepCount = 0
|
||||||
|
## Init Gui
|
||||||
|
|
||||||
|
self.__initGUI(win)
|
||||||
|
|
||||||
|
# Init environment
|
||||||
|
if robotType == 'crawler':
|
||||||
|
self.robot = crawler.CrawlingRobot(self.canvas)
|
||||||
|
self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
|
||||||
|
elif robotType == 'pendulum':
|
||||||
|
self.robot = pendulum.PendulumRobot(self.canvas)
|
||||||
|
self.robotEnvironment = \
|
||||||
|
pendulum.PendulumRobotEnvironment(self.robot)
|
||||||
|
else:
|
||||||
|
raise "Unknown RobotType"
|
||||||
|
|
||||||
|
# Init Agent
|
||||||
|
simulationFn = lambda agent: \
|
||||||
|
simulation.SimulationEnvironment(self.robotEnvironment,agent)
|
||||||
|
actionFn = lambda state: \
|
||||||
|
self.robotEnvironment.getPossibleActions(state)
|
||||||
|
self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
|
||||||
|
|
||||||
|
self.learner.setEpsilon(self.epsilon)
|
||||||
|
self.learner.setLearningRate(self.alpha)
|
||||||
|
self.learner.setDiscount(self.gamma)
|
||||||
|
|
||||||
|
# Start GUI
|
||||||
|
self.running = True
|
||||||
|
self.stopped = False
|
||||||
|
self.stepsToSkip = 0
|
||||||
|
self.thread = threading.Thread(target=self.run)
|
||||||
|
self.thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
def exit(self):
|
||||||
|
self.running = False
|
||||||
|
for i in range(5):
|
||||||
|
if not self.stopped:
|
||||||
|
time.sleep(0.1)
|
||||||
|
try:
|
||||||
|
self.win.destroy()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
def step(self):
|
||||||
|
|
||||||
|
self.stepCount += 1
|
||||||
|
|
||||||
|
state = self.robotEnvironment.getCurrentState()
|
||||||
|
actions = self.robotEnvironment.getPossibleActions(state)
|
||||||
|
if len(actions) == 0.0:
|
||||||
|
self.robotEnvironment.reset()
|
||||||
|
state = self.robotEnvironment.getCurrentState()
|
||||||
|
actions = self.robotEnvironment.getPossibleActions(state)
|
||||||
|
print 'Reset!'
|
||||||
|
action = self.learner.getAction(state)
|
||||||
|
if action == None:
|
||||||
|
raise 'None action returned: Code Not Complete'
|
||||||
|
nextState, reward = self.robotEnvironment.doAction(action)
|
||||||
|
self.learner.observeTransition(state, action, nextState, reward)
|
||||||
|
|
||||||
|
def animatePolicy(self):
|
||||||
|
if robotType != 'pendulum':
|
||||||
|
raise 'Only pendulum can animatePolicy'
|
||||||
|
|
||||||
|
|
||||||
|
totWidth = self.canvas.winfo_reqwidth()
|
||||||
|
totHeight = self.canvas.winfo_reqheight()
|
||||||
|
|
||||||
|
length = 0.48 * min(totWidth, totHeight)
|
||||||
|
x,y = totWidth-length-30, length+10
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
angleMin, angleMax = self.robot.getMinAndMaxAngle()
|
||||||
|
velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
|
||||||
|
|
||||||
|
if not 'animatePolicyBox' in dir(self):
|
||||||
|
self.canvas.create_line(x,y,x+length,y)
|
||||||
|
self.canvas.create_line(x+length,y,x+length,y-length)
|
||||||
|
self.canvas.create_line(x+length,y-length,x,y-length)
|
||||||
|
self.canvas.create_line(x,y-length,x,y)
|
||||||
|
self.animatePolicyBox = 1
|
||||||
|
self.canvas.create_text(x+length/2,y+10,text='angle')
|
||||||
|
self.canvas.create_text(x-30,y-length/2,text='velocity')
|
||||||
|
self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
|
||||||
|
self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
|
||||||
|
self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
angleDelta = (angleMax-angleMin) / 100
|
||||||
|
velDelta = (velMax-velMin) / 100
|
||||||
|
for i in range(100):
|
||||||
|
angle = angleMin + i * angleDelta
|
||||||
|
|
||||||
|
for j in range(100):
|
||||||
|
vel = velMin + j * velDelta
|
||||||
|
state = self.robotEnvironment.getState(angle,vel)
|
||||||
|
max, argMax = None, None
|
||||||
|
if not self.learner.seenState(state):
|
||||||
|
argMax = 'unseen'
|
||||||
|
else:
|
||||||
|
for action in ('kickLeft','kickRight','doNothing'):
|
||||||
|
qVal = self.learner.getQValue(state, action)
|
||||||
|
if max == None or qVal > max:
|
||||||
|
max, argMax = qVal, action
|
||||||
|
if argMax != 'unseen':
|
||||||
|
if argMax == 'kickLeft':
|
||||||
|
color = 'blue'
|
||||||
|
elif argMax == 'kickRight':
|
||||||
|
color = 'red'
|
||||||
|
elif argMax == 'doNothing':
|
||||||
|
color = 'white'
|
||||||
|
dx = length / 100.0
|
||||||
|
dy = length / 100.0
|
||||||
|
x0, y0 = x+i*dx, y-j*dy
|
||||||
|
self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.stepCount = 0
|
||||||
|
self.learner.startEpisode()
|
||||||
|
while True:
|
||||||
|
minSleep = .01
|
||||||
|
tm = max(minSleep, self.tickTime)
|
||||||
|
time.sleep(tm)
|
||||||
|
self.stepsToSkip = int(tm / self.tickTime) - 1
|
||||||
|
|
||||||
|
if not self.running:
|
||||||
|
self.stopped = True
|
||||||
|
return
|
||||||
|
for i in range(self.stepsToSkip):
|
||||||
|
self.step()
|
||||||
|
self.stepsToSkip = 0
|
||||||
|
self.step()
|
||||||
|
# self.robot.draw()
|
||||||
|
self.learner.stopEpisode()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self.win.mainloop()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
global root
|
||||||
|
root = Tkinter.Tk()
|
||||||
|
root.title( 'Crawler GUI' )
|
||||||
|
root.resizable( 0, 0 )
|
||||||
|
|
||||||
|
# root.mainloop()
|
||||||
|
|
||||||
|
|
||||||
|
app = Application(root)
|
||||||
|
def update_gui():
|
||||||
|
app.robot.draw(app.stepCount, app.tickTime)
|
||||||
|
root.after(10, update_gui)
|
||||||
|
update_gui()
|
||||||
|
|
||||||
|
root.protocol( 'WM_DELETE_WINDOW', app.exit)
|
||||||
|
try:
|
||||||
|
app.start()
|
||||||
|
except:
|
||||||
|
app.exit()
|
||||||
679
p3_rl/graphicsDisplay.py
Normal file
679
p3_rl/graphicsDisplay.py
Normal file
@@ -0,0 +1,679 @@
|
|||||||
|
# graphicsDisplay.py
|
||||||
|
# ------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from graphicsUtils import *
|
||||||
|
import math, time
|
||||||
|
from game import Directions
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# GRAPHICS DISPLAY CODE #
|
||||||
|
###########################
|
||||||
|
|
||||||
|
# Most code by Dan Klein and John Denero written or rewritten for cs188, UC Berkeley.
|
||||||
|
# Some code from a Pacman implementation by LiveWires, and used / modified with permission.
|
||||||
|
|
||||||
|
DEFAULT_GRID_SIZE = 30.0
|
||||||
|
INFO_PANE_HEIGHT = 35
|
||||||
|
BACKGROUND_COLOR = formatColor(0,0,0)
|
||||||
|
WALL_COLOR = formatColor(0.0/255.0, 51.0/255.0, 255.0/255.0)
|
||||||
|
INFO_PANE_COLOR = formatColor(.4,.4,0)
|
||||||
|
SCORE_COLOR = formatColor(.9, .9, .9)
|
||||||
|
PACMAN_OUTLINE_WIDTH = 2
|
||||||
|
PACMAN_CAPTURE_OUTLINE_WIDTH = 4
|
||||||
|
|
||||||
|
GHOST_COLORS = []
|
||||||
|
GHOST_COLORS.append(formatColor(.9,0,0)) # Red
|
||||||
|
GHOST_COLORS.append(formatColor(0,.3,.9)) # Blue
|
||||||
|
GHOST_COLORS.append(formatColor(.98,.41,.07)) # Orange
|
||||||
|
GHOST_COLORS.append(formatColor(.1,.75,.7)) # Green
|
||||||
|
GHOST_COLORS.append(formatColor(1.0,0.6,0.0)) # Yellow
|
||||||
|
GHOST_COLORS.append(formatColor(.4,0.13,0.91)) # Purple
|
||||||
|
|
||||||
|
TEAM_COLORS = GHOST_COLORS[:2]
|
||||||
|
|
||||||
|
GHOST_SHAPE = [
|
||||||
|
( 0, 0.3 ),
|
||||||
|
( 0.25, 0.75 ),
|
||||||
|
( 0.5, 0.3 ),
|
||||||
|
( 0.75, 0.75 ),
|
||||||
|
( 0.75, -0.5 ),
|
||||||
|
( 0.5, -0.75 ),
|
||||||
|
(-0.5, -0.75 ),
|
||||||
|
(-0.75, -0.5 ),
|
||||||
|
(-0.75, 0.75 ),
|
||||||
|
(-0.5, 0.3 ),
|
||||||
|
(-0.25, 0.75 )
|
||||||
|
]
|
||||||
|
GHOST_SIZE = 0.65
|
||||||
|
SCARED_COLOR = formatColor(1,1,1)
|
||||||
|
|
||||||
|
GHOST_VEC_COLORS = map(colorToVector, GHOST_COLORS)
|
||||||
|
|
||||||
|
PACMAN_COLOR = formatColor(255.0/255.0,255.0/255.0,61.0/255)
|
||||||
|
PACMAN_SCALE = 0.5
|
||||||
|
#pacman_speed = 0.25
|
||||||
|
|
||||||
|
# Food
|
||||||
|
FOOD_COLOR = formatColor(1,1,1)
|
||||||
|
FOOD_SIZE = 0.1
|
||||||
|
|
||||||
|
# Laser
|
||||||
|
LASER_COLOR = formatColor(1,0,0)
|
||||||
|
LASER_SIZE = 0.02
|
||||||
|
|
||||||
|
# Capsule graphics
|
||||||
|
CAPSULE_COLOR = formatColor(1,1,1)
|
||||||
|
CAPSULE_SIZE = 0.25
|
||||||
|
|
||||||
|
# Drawing walls
|
||||||
|
WALL_RADIUS = 0.15
|
||||||
|
|
||||||
|
class InfoPane:
|
||||||
|
def __init__(self, layout, gridSize):
|
||||||
|
self.gridSize = gridSize
|
||||||
|
self.width = (layout.width) * gridSize
|
||||||
|
self.base = (layout.height + 1) * gridSize
|
||||||
|
self.height = INFO_PANE_HEIGHT
|
||||||
|
self.fontSize = 24
|
||||||
|
self.textColor = PACMAN_COLOR
|
||||||
|
self.drawPane()
|
||||||
|
|
||||||
|
def toScreen(self, pos, y = None):
|
||||||
|
"""
|
||||||
|
Translates a point relative from the bottom left of the info pane.
|
||||||
|
"""
|
||||||
|
if y == None:
|
||||||
|
x,y = pos
|
||||||
|
else:
|
||||||
|
x = pos
|
||||||
|
|
||||||
|
x = self.gridSize + x # Margin
|
||||||
|
y = self.base + y
|
||||||
|
return x,y
|
||||||
|
|
||||||
|
def drawPane(self):
|
||||||
|
self.scoreText = text( self.toScreen(0, 0 ), self.textColor, "SCORE: 0", "Times", self.fontSize, "bold")
|
||||||
|
|
||||||
|
def initializeGhostDistances(self, distances):
|
||||||
|
self.ghostDistanceText = []
|
||||||
|
|
||||||
|
size = 20
|
||||||
|
if self.width < 240:
|
||||||
|
size = 12
|
||||||
|
if self.width < 160:
|
||||||
|
size = 10
|
||||||
|
|
||||||
|
for i, d in enumerate(distances):
|
||||||
|
t = text( self.toScreen(self.width/2 + self.width/8 * i, 0), GHOST_COLORS[i+1], d, "Times", size, "bold")
|
||||||
|
self.ghostDistanceText.append(t)
|
||||||
|
|
||||||
|
def updateScore(self, score):
|
||||||
|
changeText(self.scoreText, "SCORE: % 4d" % score)
|
||||||
|
|
||||||
|
def setTeam(self, isBlue):
|
||||||
|
text = "RED TEAM"
|
||||||
|
if isBlue: text = "BLUE TEAM"
|
||||||
|
self.teamText = text( self.toScreen(300, 0 ), self.textColor, text, "Times", self.fontSize, "bold")
|
||||||
|
|
||||||
|
def updateGhostDistances(self, distances):
|
||||||
|
if len(distances) == 0: return
|
||||||
|
if 'ghostDistanceText' not in dir(self): self.initializeGhostDistances(distances)
|
||||||
|
else:
|
||||||
|
for i, d in enumerate(distances):
|
||||||
|
changeText(self.ghostDistanceText[i], d)
|
||||||
|
|
||||||
|
def drawGhost(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def drawPacman(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def drawWarning(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clearIcon(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def updateMessage(self, message):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clearMessage(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PacmanGraphics:
|
||||||
|
def __init__(self, zoom=1.0, frameTime=0.0, capture=False):
|
||||||
|
self.have_window = 0
|
||||||
|
self.currentGhostImages = {}
|
||||||
|
self.pacmanImage = None
|
||||||
|
self.zoom = zoom
|
||||||
|
self.gridSize = DEFAULT_GRID_SIZE * zoom
|
||||||
|
self.capture = capture
|
||||||
|
self.frameTime = frameTime
|
||||||
|
|
||||||
|
def checkNullDisplay(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def initialize(self, state, isBlue = False):
|
||||||
|
self.isBlue = isBlue
|
||||||
|
self.startGraphics(state)
|
||||||
|
|
||||||
|
# self.drawDistributions(state)
|
||||||
|
self.distributionImages = None # Initialized lazily
|
||||||
|
self.drawStaticObjects(state)
|
||||||
|
self.drawAgentObjects(state)
|
||||||
|
|
||||||
|
# Information
|
||||||
|
self.previousState = state
|
||||||
|
|
||||||
|
def startGraphics(self, state):
|
||||||
|
self.layout = state.layout
|
||||||
|
layout = self.layout
|
||||||
|
self.width = layout.width
|
||||||
|
self.height = layout.height
|
||||||
|
self.make_window(self.width, self.height)
|
||||||
|
self.infoPane = InfoPane(layout, self.gridSize)
|
||||||
|
self.currentState = layout
|
||||||
|
|
||||||
|
def drawDistributions(self, state):
|
||||||
|
walls = state.layout.walls
|
||||||
|
dist = []
|
||||||
|
for x in range(walls.width):
|
||||||
|
distx = []
|
||||||
|
dist.append(distx)
|
||||||
|
for y in range(walls.height):
|
||||||
|
( screen_x, screen_y ) = self.to_screen( (x, y) )
|
||||||
|
block = square( (screen_x, screen_y),
|
||||||
|
0.5 * self.gridSize,
|
||||||
|
color = BACKGROUND_COLOR,
|
||||||
|
filled = 1, behind=2)
|
||||||
|
distx.append(block)
|
||||||
|
self.distributionImages = dist
|
||||||
|
|
||||||
|
def drawStaticObjects(self, state):
|
||||||
|
layout = self.layout
|
||||||
|
self.drawWalls(layout.walls)
|
||||||
|
self.food = self.drawFood(layout.food)
|
||||||
|
self.capsules = self.drawCapsules(layout.capsules)
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def drawAgentObjects(self, state):
|
||||||
|
self.agentImages = [] # (agentState, image)
|
||||||
|
for index, agent in enumerate(state.agentStates):
|
||||||
|
if agent.isPacman:
|
||||||
|
image = self.drawPacman(agent, index)
|
||||||
|
self.agentImages.append( (agent, image) )
|
||||||
|
else:
|
||||||
|
image = self.drawGhost(agent, index)
|
||||||
|
self.agentImages.append( (agent, image) )
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def swapImages(self, agentIndex, newState):
|
||||||
|
"""
|
||||||
|
Changes an image from a ghost to a pacman or vis versa (for capture)
|
||||||
|
"""
|
||||||
|
prevState, prevImage = self.agentImages[agentIndex]
|
||||||
|
for item in prevImage: remove_from_screen(item)
|
||||||
|
if newState.isPacman:
|
||||||
|
image = self.drawPacman(newState, agentIndex)
|
||||||
|
self.agentImages[agentIndex] = (newState, image )
|
||||||
|
else:
|
||||||
|
image = self.drawGhost(newState, agentIndex)
|
||||||
|
self.agentImages[agentIndex] = (newState, image )
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def update(self, newState):
|
||||||
|
agentIndex = newState._agentMoved
|
||||||
|
agentState = newState.agentStates[agentIndex]
|
||||||
|
|
||||||
|
if self.agentImages[agentIndex][0].isPacman != agentState.isPacman: self.swapImages(agentIndex, agentState)
|
||||||
|
prevState, prevImage = self.agentImages[agentIndex]
|
||||||
|
if agentState.isPacman:
|
||||||
|
self.animatePacman(agentState, prevState, prevImage)
|
||||||
|
else:
|
||||||
|
self.moveGhost(agentState, agentIndex, prevState, prevImage)
|
||||||
|
self.agentImages[agentIndex] = (agentState, prevImage)
|
||||||
|
|
||||||
|
if newState._foodEaten != None:
|
||||||
|
self.removeFood(newState._foodEaten, self.food)
|
||||||
|
if newState._capsuleEaten != None:
|
||||||
|
self.removeCapsule(newState._capsuleEaten, self.capsules)
|
||||||
|
self.infoPane.updateScore(newState.score)
|
||||||
|
if 'ghostDistances' in dir(newState):
|
||||||
|
self.infoPane.updateGhostDistances(newState.ghostDistances)
|
||||||
|
|
||||||
|
def make_window(self, width, height):
|
||||||
|
grid_width = (width-1) * self.gridSize
|
||||||
|
grid_height = (height-1) * self.gridSize
|
||||||
|
screen_width = 2*self.gridSize + grid_width
|
||||||
|
screen_height = 2*self.gridSize + grid_height + INFO_PANE_HEIGHT
|
||||||
|
|
||||||
|
begin_graphics(screen_width,
|
||||||
|
screen_height,
|
||||||
|
BACKGROUND_COLOR,
|
||||||
|
"CS188 Pacman")
|
||||||
|
|
||||||
|
def drawPacman(self, pacman, index):
|
||||||
|
position = self.getPosition(pacman)
|
||||||
|
screen_point = self.to_screen(position)
|
||||||
|
endpoints = self.getEndpoints(self.getDirection(pacman))
|
||||||
|
|
||||||
|
width = PACMAN_OUTLINE_WIDTH
|
||||||
|
outlineColor = PACMAN_COLOR
|
||||||
|
fillColor = PACMAN_COLOR
|
||||||
|
|
||||||
|
if self.capture:
|
||||||
|
outlineColor = TEAM_COLORS[index % 2]
|
||||||
|
fillColor = GHOST_COLORS[index]
|
||||||
|
width = PACMAN_CAPTURE_OUTLINE_WIDTH
|
||||||
|
|
||||||
|
return [circle(screen_point, PACMAN_SCALE * self.gridSize,
|
||||||
|
fillColor = fillColor, outlineColor = outlineColor,
|
||||||
|
endpoints = endpoints,
|
||||||
|
width = width)]
|
||||||
|
|
||||||
|
def getEndpoints(self, direction, position=(0,0)):
|
||||||
|
x, y = position
|
||||||
|
pos = x - int(x) + y - int(y)
|
||||||
|
width = 30 + 80 * math.sin(math.pi* pos)
|
||||||
|
|
||||||
|
delta = width / 2
|
||||||
|
if (direction == 'West'):
|
||||||
|
endpoints = (180+delta, 180-delta)
|
||||||
|
elif (direction == 'North'):
|
||||||
|
endpoints = (90+delta, 90-delta)
|
||||||
|
elif (direction == 'South'):
|
||||||
|
endpoints = (270+delta, 270-delta)
|
||||||
|
else:
|
||||||
|
endpoints = (0+delta, 0-delta)
|
||||||
|
return endpoints
|
||||||
|
|
||||||
|
def movePacman(self, position, direction, image):
|
||||||
|
screenPosition = self.to_screen(position)
|
||||||
|
endpoints = self.getEndpoints( direction, position )
|
||||||
|
r = PACMAN_SCALE * self.gridSize
|
||||||
|
moveCircle(image[0], screenPosition, r, endpoints)
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def animatePacman(self, pacman, prevPacman, image):
|
||||||
|
if self.frameTime < 0:
|
||||||
|
print 'Press any key to step forward, "q" to play'
|
||||||
|
keys = wait_for_keys()
|
||||||
|
if 'q' in keys:
|
||||||
|
self.frameTime = 0.1
|
||||||
|
if self.frameTime > 0.01 or self.frameTime < 0:
|
||||||
|
start = time.time()
|
||||||
|
fx, fy = self.getPosition(prevPacman)
|
||||||
|
px, py = self.getPosition(pacman)
|
||||||
|
frames = 4.0
|
||||||
|
for i in range(1,int(frames) + 1):
|
||||||
|
pos = px*i/frames + fx*(frames-i)/frames, py*i/frames + fy*(frames-i)/frames
|
||||||
|
self.movePacman(pos, self.getDirection(pacman), image)
|
||||||
|
refresh()
|
||||||
|
sleep(abs(self.frameTime) / frames)
|
||||||
|
else:
|
||||||
|
self.movePacman(self.getPosition(pacman), self.getDirection(pacman), image)
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def getGhostColor(self, ghost, ghostIndex):
|
||||||
|
if ghost.scaredTimer > 0:
|
||||||
|
return SCARED_COLOR
|
||||||
|
else:
|
||||||
|
return GHOST_COLORS[ghostIndex]
|
||||||
|
|
||||||
|
def drawGhost(self, ghost, agentIndex):
|
||||||
|
pos = self.getPosition(ghost)
|
||||||
|
dir = self.getDirection(ghost)
|
||||||
|
(screen_x, screen_y) = (self.to_screen(pos) )
|
||||||
|
coords = []
|
||||||
|
for (x, y) in GHOST_SHAPE:
|
||||||
|
coords.append((x*self.gridSize*GHOST_SIZE + screen_x, y*self.gridSize*GHOST_SIZE + screen_y))
|
||||||
|
|
||||||
|
colour = self.getGhostColor(ghost, agentIndex)
|
||||||
|
body = polygon(coords, colour, filled = 1)
|
||||||
|
WHITE = formatColor(1.0, 1.0, 1.0)
|
||||||
|
BLACK = formatColor(0.0, 0.0, 0.0)
|
||||||
|
|
||||||
|
dx = 0
|
||||||
|
dy = 0
|
||||||
|
if dir == 'North':
|
||||||
|
dy = -0.2
|
||||||
|
if dir == 'South':
|
||||||
|
dy = 0.2
|
||||||
|
if dir == 'East':
|
||||||
|
dx = 0.2
|
||||||
|
if dir == 'West':
|
||||||
|
dx = -0.2
|
||||||
|
leftEye = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
|
||||||
|
rightEye = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2, WHITE, WHITE)
|
||||||
|
leftPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
|
||||||
|
rightPupil = circle((screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08, BLACK, BLACK)
|
||||||
|
ghostImageParts = []
|
||||||
|
ghostImageParts.append(body)
|
||||||
|
ghostImageParts.append(leftEye)
|
||||||
|
ghostImageParts.append(rightEye)
|
||||||
|
ghostImageParts.append(leftPupil)
|
||||||
|
ghostImageParts.append(rightPupil)
|
||||||
|
|
||||||
|
return ghostImageParts
|
||||||
|
|
||||||
|
def moveEyes(self, pos, dir, eyes):
|
||||||
|
(screen_x, screen_y) = (self.to_screen(pos) )
|
||||||
|
dx = 0
|
||||||
|
dy = 0
|
||||||
|
if dir == 'North':
|
||||||
|
dy = -0.2
|
||||||
|
if dir == 'South':
|
||||||
|
dy = 0.2
|
||||||
|
if dir == 'East':
|
||||||
|
dx = 0.2
|
||||||
|
if dir == 'West':
|
||||||
|
dx = -0.2
|
||||||
|
moveCircle(eyes[0],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
|
||||||
|
moveCircle(eyes[1],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx/1.5), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy/1.5)), self.gridSize*GHOST_SIZE*0.2)
|
||||||
|
moveCircle(eyes[2],(screen_x+self.gridSize*GHOST_SIZE*(-0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
|
||||||
|
moveCircle(eyes[3],(screen_x+self.gridSize*GHOST_SIZE*(0.3+dx), screen_y-self.gridSize*GHOST_SIZE*(0.3-dy)), self.gridSize*GHOST_SIZE*0.08)
|
||||||
|
|
||||||
|
def moveGhost(self, ghost, ghostIndex, prevGhost, ghostImageParts):
|
||||||
|
old_x, old_y = self.to_screen(self.getPosition(prevGhost))
|
||||||
|
new_x, new_y = self.to_screen(self.getPosition(ghost))
|
||||||
|
delta = new_x - old_x, new_y - old_y
|
||||||
|
|
||||||
|
for ghostImagePart in ghostImageParts:
|
||||||
|
move_by(ghostImagePart, delta)
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
if ghost.scaredTimer > 0:
|
||||||
|
color = SCARED_COLOR
|
||||||
|
else:
|
||||||
|
color = GHOST_COLORS[ghostIndex]
|
||||||
|
edit(ghostImageParts[0], ('fill', color), ('outline', color))
|
||||||
|
self.moveEyes(self.getPosition(ghost), self.getDirection(ghost), ghostImageParts[-4:])
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def getPosition(self, agentState):
|
||||||
|
if agentState.configuration == None: return (-1000, -1000)
|
||||||
|
return agentState.getPosition()
|
||||||
|
|
||||||
|
def getDirection(self, agentState):
|
||||||
|
if agentState.configuration == None: return Directions.STOP
|
||||||
|
return agentState.configuration.getDirection()
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
end_graphics()
|
||||||
|
|
||||||
|
def to_screen(self, point):
|
||||||
|
( x, y ) = point
|
||||||
|
#y = self.height - y
|
||||||
|
x = (x + 1)*self.gridSize
|
||||||
|
y = (self.height - y)*self.gridSize
|
||||||
|
return ( x, y )
|
||||||
|
|
||||||
|
# Fixes some TK issue with off-center circles
|
||||||
|
def to_screen2(self, point):
|
||||||
|
( x, y ) = point
|
||||||
|
#y = self.height - y
|
||||||
|
x = (x + 1)*self.gridSize
|
||||||
|
y = (self.height - y)*self.gridSize
|
||||||
|
return ( x, y )
|
||||||
|
|
||||||
|
def drawWalls(self, wallMatrix):
|
||||||
|
wallColor = WALL_COLOR
|
||||||
|
for xNum, x in enumerate(wallMatrix):
|
||||||
|
if self.capture and (xNum * 2) < wallMatrix.width: wallColor = TEAM_COLORS[0]
|
||||||
|
if self.capture and (xNum * 2) >= wallMatrix.width: wallColor = TEAM_COLORS[1]
|
||||||
|
|
||||||
|
for yNum, cell in enumerate(x):
|
||||||
|
if cell: # There's a wall here
|
||||||
|
pos = (xNum, yNum)
|
||||||
|
screen = self.to_screen(pos)
|
||||||
|
screen2 = self.to_screen2(pos)
|
||||||
|
|
||||||
|
# draw each quadrant of the square based on adjacent walls
|
||||||
|
wIsWall = self.isWall(xNum-1, yNum, wallMatrix)
|
||||||
|
eIsWall = self.isWall(xNum+1, yNum, wallMatrix)
|
||||||
|
nIsWall = self.isWall(xNum, yNum+1, wallMatrix)
|
||||||
|
sIsWall = self.isWall(xNum, yNum-1, wallMatrix)
|
||||||
|
nwIsWall = self.isWall(xNum-1, yNum+1, wallMatrix)
|
||||||
|
swIsWall = self.isWall(xNum-1, yNum-1, wallMatrix)
|
||||||
|
neIsWall = self.isWall(xNum+1, yNum+1, wallMatrix)
|
||||||
|
seIsWall = self.isWall(xNum+1, yNum-1, wallMatrix)
|
||||||
|
|
||||||
|
# NE quadrant
|
||||||
|
if (not nIsWall) and (not eIsWall):
|
||||||
|
# inner circle
|
||||||
|
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (0,91), 'arc')
|
||||||
|
if (nIsWall) and (not eIsWall):
|
||||||
|
# vertical line
|
||||||
|
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
|
||||||
|
if (not nIsWall) and (eIsWall):
|
||||||
|
# horizontal line
|
||||||
|
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||||
|
if (nIsWall) and (eIsWall) and (not neIsWall):
|
||||||
|
# outer circle
|
||||||
|
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (180,271), 'arc')
|
||||||
|
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||||
|
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
|
||||||
|
|
||||||
|
# NW quadrant
|
||||||
|
if (not nIsWall) and (not wIsWall):
|
||||||
|
# inner circle
|
||||||
|
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (90,181), 'arc')
|
||||||
|
if (nIsWall) and (not wIsWall):
|
||||||
|
# vertical line
|
||||||
|
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5)-1)), wallColor)
|
||||||
|
if (not nIsWall) and (wIsWall):
|
||||||
|
# horizontal line
|
||||||
|
line(add(screen, (0, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||||
|
if (nIsWall) and (wIsWall) and (not nwIsWall):
|
||||||
|
# outer circle
|
||||||
|
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (270,361), 'arc')
|
||||||
|
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(-1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(-1)*WALL_RADIUS)), wallColor)
|
||||||
|
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-2)*WALL_RADIUS+1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(-0.5))), wallColor)
|
||||||
|
|
||||||
|
# SE quadrant
|
||||||
|
if (not sIsWall) and (not eIsWall):
|
||||||
|
# inner circle
|
||||||
|
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (270,361), 'arc')
|
||||||
|
if (sIsWall) and (not eIsWall):
|
||||||
|
# vertical line
|
||||||
|
line(add(screen, (self.gridSize*WALL_RADIUS, 0)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
|
||||||
|
if (not sIsWall) and (eIsWall):
|
||||||
|
# horizontal line
|
||||||
|
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5+1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||||
|
if (sIsWall) and (eIsWall) and (not seIsWall):
|
||||||
|
# outer circle
|
||||||
|
circle(add(screen2, (self.gridSize*2*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (90,181), 'arc')
|
||||||
|
line(add(screen, (self.gridSize*2*WALL_RADIUS-1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*0.5, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||||
|
line(add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
|
||||||
|
|
||||||
|
# SW quadrant
|
||||||
|
if (not sIsWall) and (not wIsWall):
|
||||||
|
# inner circle
|
||||||
|
circle(screen2, WALL_RADIUS * self.gridSize, wallColor, wallColor, (180,271), 'arc')
|
||||||
|
if (sIsWall) and (not wIsWall):
|
||||||
|
# vertical line
|
||||||
|
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, 0)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5)+1)), wallColor)
|
||||||
|
if (not sIsWall) and (wIsWall):
|
||||||
|
# horizontal line
|
||||||
|
line(add(screen, (0, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5)-1, self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||||
|
if (sIsWall) and (wIsWall) and (not swIsWall):
|
||||||
|
# outer circle
|
||||||
|
circle(add(screen2, (self.gridSize*(-2)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS)), WALL_RADIUS * self.gridSize-1, wallColor, wallColor, (0,91), 'arc')
|
||||||
|
line(add(screen, (self.gridSize*(-2)*WALL_RADIUS+1, self.gridSize*(1)*WALL_RADIUS)), add(screen, (self.gridSize*(-0.5), self.gridSize*(1)*WALL_RADIUS)), wallColor)
|
||||||
|
line(add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(2)*WALL_RADIUS-1)), add(screen, (self.gridSize*(-1)*WALL_RADIUS, self.gridSize*(0.5))), wallColor)
|
||||||
|
|
||||||
|
def isWall(self, x, y, walls):
|
||||||
|
if x < 0 or y < 0:
|
||||||
|
return False
|
||||||
|
if x >= walls.width or y >= walls.height:
|
||||||
|
return False
|
||||||
|
return walls[x][y]
|
||||||
|
|
||||||
|
def drawFood(self, foodMatrix ):
|
||||||
|
foodImages = []
|
||||||
|
color = FOOD_COLOR
|
||||||
|
for xNum, x in enumerate(foodMatrix):
|
||||||
|
if self.capture and (xNum * 2) <= foodMatrix.width: color = TEAM_COLORS[0]
|
||||||
|
if self.capture and (xNum * 2) > foodMatrix.width: color = TEAM_COLORS[1]
|
||||||
|
imageRow = []
|
||||||
|
foodImages.append(imageRow)
|
||||||
|
for yNum, cell in enumerate(x):
|
||||||
|
if cell: # There's food here
|
||||||
|
screen = self.to_screen((xNum, yNum ))
|
||||||
|
dot = circle( screen,
|
||||||
|
FOOD_SIZE * self.gridSize,
|
||||||
|
outlineColor = color, fillColor = color,
|
||||||
|
width = 1)
|
||||||
|
imageRow.append(dot)
|
||||||
|
else:
|
||||||
|
imageRow.append(None)
|
||||||
|
return foodImages
|
||||||
|
|
||||||
|
def drawCapsules(self, capsules ):
|
||||||
|
capsuleImages = {}
|
||||||
|
for capsule in capsules:
|
||||||
|
( screen_x, screen_y ) = self.to_screen(capsule)
|
||||||
|
dot = circle( (screen_x, screen_y),
|
||||||
|
CAPSULE_SIZE * self.gridSize,
|
||||||
|
outlineColor = CAPSULE_COLOR,
|
||||||
|
fillColor = CAPSULE_COLOR,
|
||||||
|
width = 1)
|
||||||
|
capsuleImages[capsule] = dot
|
||||||
|
return capsuleImages
|
||||||
|
|
||||||
|
def removeFood(self, cell, foodImages ):
|
||||||
|
x, y = cell
|
||||||
|
remove_from_screen(foodImages[x][y])
|
||||||
|
|
||||||
|
def removeCapsule(self, cell, capsuleImages ):
|
||||||
|
x, y = cell
|
||||||
|
remove_from_screen(capsuleImages[(x, y)])
|
||||||
|
|
||||||
|
def drawExpandedCells(self, cells):
|
||||||
|
"""
|
||||||
|
Draws an overlay of expanded grid positions for search agents
|
||||||
|
"""
|
||||||
|
n = float(len(cells))
|
||||||
|
baseColor = [1.0, 0.0, 0.0]
|
||||||
|
self.clearExpandedCells()
|
||||||
|
self.expandedCells = []
|
||||||
|
for k, cell in enumerate(cells):
|
||||||
|
screenPos = self.to_screen( cell)
|
||||||
|
cellColor = formatColor(*[(n-k) * c * .5 / n + .25 for c in baseColor])
|
||||||
|
block = square(screenPos,
|
||||||
|
0.5 * self.gridSize,
|
||||||
|
color = cellColor,
|
||||||
|
filled = 1, behind=2)
|
||||||
|
self.expandedCells.append(block)
|
||||||
|
if self.frameTime < 0:
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
def clearExpandedCells(self):
|
||||||
|
if 'expandedCells' in dir(self) and len(self.expandedCells) > 0:
|
||||||
|
for cell in self.expandedCells:
|
||||||
|
remove_from_screen(cell)
|
||||||
|
|
||||||
|
|
||||||
|
def updateDistributions(self, distributions):
|
||||||
|
"Draws an agent's belief distributions"
|
||||||
|
# copy all distributions so we don't change their state
|
||||||
|
distributions = map(lambda x: x.copy(), distributions)
|
||||||
|
if self.distributionImages == None:
|
||||||
|
self.drawDistributions(self.previousState)
|
||||||
|
for x in range(len(self.distributionImages)):
|
||||||
|
for y in range(len(self.distributionImages[0])):
|
||||||
|
image = self.distributionImages[x][y]
|
||||||
|
weights = [dist[ (x,y) ] for dist in distributions]
|
||||||
|
|
||||||
|
if sum(weights) != 0:
|
||||||
|
pass
|
||||||
|
# Fog of war
|
||||||
|
color = [0.0,0.0,0.0]
|
||||||
|
colors = GHOST_VEC_COLORS[1:] # With Pacman
|
||||||
|
if self.capture: colors = GHOST_VEC_COLORS
|
||||||
|
for weight, gcolor in zip(weights, colors):
|
||||||
|
color = [min(1.0, c + 0.95 * g * weight ** .3) for c,g in zip(color, gcolor)]
|
||||||
|
changeColor(image, formatColor(*color))
|
||||||
|
refresh()
|
||||||
|
|
||||||
|
class FirstPersonPacmanGraphics(PacmanGraphics):
|
||||||
|
def __init__(self, zoom = 1.0, showGhosts = True, capture = False, frameTime=0):
|
||||||
|
PacmanGraphics.__init__(self, zoom, frameTime=frameTime)
|
||||||
|
self.showGhosts = showGhosts
|
||||||
|
self.capture = capture
|
||||||
|
|
||||||
|
def initialize(self, state, isBlue = False):
|
||||||
|
|
||||||
|
self.isBlue = isBlue
|
||||||
|
PacmanGraphics.startGraphics(self, state)
|
||||||
|
# Initialize distribution images
|
||||||
|
walls = state.layout.walls
|
||||||
|
dist = []
|
||||||
|
self.layout = state.layout
|
||||||
|
|
||||||
|
# Draw the rest
|
||||||
|
self.distributionImages = None # initialize lazily
|
||||||
|
self.drawStaticObjects(state)
|
||||||
|
self.drawAgentObjects(state)
|
||||||
|
|
||||||
|
# Information
|
||||||
|
self.previousState = state
|
||||||
|
|
||||||
|
def lookAhead(self, config, state):
|
||||||
|
if config.getDirection() == 'Stop':
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
# Draw relevant ghosts
|
||||||
|
allGhosts = state.getGhostStates()
|
||||||
|
visibleGhosts = state.getVisibleGhosts()
|
||||||
|
for i, ghost in enumerate(allGhosts):
|
||||||
|
if ghost in visibleGhosts:
|
||||||
|
self.drawGhost(ghost, i)
|
||||||
|
else:
|
||||||
|
self.currentGhostImages[i] = None
|
||||||
|
|
||||||
|
def getGhostColor(self, ghost, ghostIndex):
|
||||||
|
return GHOST_COLORS[ghostIndex]
|
||||||
|
|
||||||
|
def getPosition(self, ghostState):
|
||||||
|
if not self.showGhosts and not ghostState.isPacman and ghostState.getPosition()[1] > 1:
|
||||||
|
return (-1000, -1000)
|
||||||
|
else:
|
||||||
|
return PacmanGraphics.getPosition(self, ghostState)
|
||||||
|
|
||||||
|
def add(x, y):
|
||||||
|
return (x[0] + y[0], x[1] + y[1])
|
||||||
|
|
||||||
|
|
||||||
|
# Saving graphical output
|
||||||
|
# -----------------------
|
||||||
|
# Note: to make an animated gif from this postscript output, try the command:
|
||||||
|
# convert -delay 7 -loop 1 -compress lzw -layers optimize frame* out.gif
|
||||||
|
# convert is part of imagemagick (freeware)
|
||||||
|
|
||||||
|
SAVE_POSTSCRIPT = False
|
||||||
|
POSTSCRIPT_OUTPUT_DIR = 'frames'
|
||||||
|
FRAME_NUMBER = 0
|
||||||
|
import os
|
||||||
|
|
||||||
|
def saveFrame():
|
||||||
|
"Saves the current graphical output as a postscript file"
|
||||||
|
global SAVE_POSTSCRIPT, FRAME_NUMBER, POSTSCRIPT_OUTPUT_DIR
|
||||||
|
if not SAVE_POSTSCRIPT: return
|
||||||
|
if not os.path.exists(POSTSCRIPT_OUTPUT_DIR): os.mkdir(POSTSCRIPT_OUTPUT_DIR)
|
||||||
|
name = os.path.join(POSTSCRIPT_OUTPUT_DIR, 'frame_%08d.ps' % FRAME_NUMBER)
|
||||||
|
FRAME_NUMBER += 1
|
||||||
|
writePostscript(name) # writes the current canvas
|
||||||
348
p3_rl/graphicsGridworldDisplay.py
Normal file
348
p3_rl/graphicsGridworldDisplay.py
Normal file
@@ -0,0 +1,348 @@
|
|||||||
|
# graphicsGridworldDisplay.py
|
||||||
|
# ---------------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import util
|
||||||
|
from graphicsUtils import *
|
||||||
|
|
||||||
|
class GraphicsGridworldDisplay:
|
||||||
|
|
||||||
|
def __init__(self, gridworld, size=120, speed=1.0):
|
||||||
|
self.gridworld = gridworld
|
||||||
|
self.size = size
|
||||||
|
self.speed = speed
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
setup(self.gridworld, size=self.size)
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
wait_for_keys()
|
||||||
|
|
||||||
|
def displayValues(self, agent, currentState = None, message = 'Agent Values'):
|
||||||
|
values = util.Counter()
|
||||||
|
policy = {}
|
||||||
|
states = self.gridworld.getStates()
|
||||||
|
for state in states:
|
||||||
|
values[state] = agent.getValue(state)
|
||||||
|
policy[state] = agent.getPolicy(state)
|
||||||
|
drawValues(self.gridworld, values, policy, currentState, message)
|
||||||
|
sleep(0.05 / self.speed)
|
||||||
|
|
||||||
|
def displayNullValues(self, currentState = None, message = ''):
|
||||||
|
values = util.Counter()
|
||||||
|
#policy = {}
|
||||||
|
states = self.gridworld.getStates()
|
||||||
|
for state in states:
|
||||||
|
values[state] = 0.0
|
||||||
|
#policy[state] = agent.getPolicy(state)
|
||||||
|
drawNullValues(self.gridworld, currentState,'')
|
||||||
|
# drawValues(self.gridworld, values, policy, currentState, message)
|
||||||
|
sleep(0.05 / self.speed)
|
||||||
|
|
||||||
|
def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'):
|
||||||
|
qValues = util.Counter()
|
||||||
|
states = self.gridworld.getStates()
|
||||||
|
for state in states:
|
||||||
|
for action in self.gridworld.getPossibleActions(state):
|
||||||
|
qValues[(state, action)] = agent.getQValue(state, action)
|
||||||
|
drawQValues(self.gridworld, qValues, currentState, message)
|
||||||
|
sleep(0.05 / self.speed)
|
||||||
|
|
||||||
|
BACKGROUND_COLOR = formatColor(0,0,0)
|
||||||
|
EDGE_COLOR = formatColor(1,1,1)
|
||||||
|
OBSTACLE_COLOR = formatColor(0.5,0.5,0.5)
|
||||||
|
TEXT_COLOR = formatColor(1,1,1)
|
||||||
|
MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7)
|
||||||
|
LOCATION_COLOR = formatColor(0,0,1)
|
||||||
|
|
||||||
|
WINDOW_SIZE = -1
|
||||||
|
GRID_SIZE = -1
|
||||||
|
GRID_HEIGHT = -1
|
||||||
|
MARGIN = -1
|
||||||
|
|
||||||
|
def setup(gridworld, title = "Gridworld Display", size = 120):
|
||||||
|
global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT
|
||||||
|
grid = gridworld.grid
|
||||||
|
WINDOW_SIZE = size
|
||||||
|
GRID_SIZE = size
|
||||||
|
GRID_HEIGHT = grid.height
|
||||||
|
MARGIN = GRID_SIZE * 0.75
|
||||||
|
screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2
|
||||||
|
screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2
|
||||||
|
|
||||||
|
begin_graphics(screen_width,
|
||||||
|
screen_height,
|
||||||
|
BACKGROUND_COLOR, title=title)
|
||||||
|
|
||||||
|
def drawNullValues(gridworld, currentState = None, message = ''):
|
||||||
|
grid = gridworld.grid
|
||||||
|
blank()
|
||||||
|
for x in range(grid.width):
|
||||||
|
for y in range(grid.height):
|
||||||
|
state = (x, y)
|
||||||
|
gridType = grid[x][y]
|
||||||
|
isExit = (str(gridType) != gridType)
|
||||||
|
isCurrent = (currentState == state)
|
||||||
|
if gridType == '#':
|
||||||
|
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||||
|
else:
|
||||||
|
drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent)
|
||||||
|
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||||
|
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||||
|
|
||||||
|
|
||||||
|
def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'):
|
||||||
|
grid = gridworld.grid
|
||||||
|
blank()
|
||||||
|
valueList = [values[state] for state in gridworld.getStates()] + [0.0]
|
||||||
|
minValue = min(valueList)
|
||||||
|
maxValue = max(valueList)
|
||||||
|
for x in range(grid.width):
|
||||||
|
for y in range(grid.height):
|
||||||
|
state = (x, y)
|
||||||
|
gridType = grid[x][y]
|
||||||
|
isExit = (str(gridType) != gridType)
|
||||||
|
isCurrent = (currentState == state)
|
||||||
|
if gridType == '#':
|
||||||
|
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||||
|
else:
|
||||||
|
value = values[state]
|
||||||
|
action = None
|
||||||
|
if policy != None and state in policy:
|
||||||
|
action = policy[state]
|
||||||
|
actions = gridworld.getPossibleActions(state)
|
||||||
|
if action not in actions and 'exit' in actions:
|
||||||
|
action = 'exit'
|
||||||
|
valString = '%.2f' % value
|
||||||
|
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
|
||||||
|
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||||
|
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||||
|
|
||||||
|
def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'):
|
||||||
|
grid = gridworld.grid
|
||||||
|
blank()
|
||||||
|
stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()]
|
||||||
|
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
|
||||||
|
qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0]
|
||||||
|
minValue = min(qValueList)
|
||||||
|
maxValue = max(qValueList)
|
||||||
|
for x in range(grid.width):
|
||||||
|
for y in range(grid.height):
|
||||||
|
state = (x, y)
|
||||||
|
gridType = grid[x][y]
|
||||||
|
isExit = (str(gridType) != gridType)
|
||||||
|
isCurrent = (currentState == state)
|
||||||
|
actions = gridworld.getPossibleActions(state)
|
||||||
|
if actions == None or len(actions) == 0:
|
||||||
|
actions = [None]
|
||||||
|
bestQ = max([qValues[(state, action)] for action in actions])
|
||||||
|
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
|
||||||
|
|
||||||
|
q = util.Counter()
|
||||||
|
valStrings = {}
|
||||||
|
for action in actions:
|
||||||
|
v = qValues[(state, action)]
|
||||||
|
q[action] += v
|
||||||
|
valStrings[action] = '%.2f' % v
|
||||||
|
if gridType == '#':
|
||||||
|
drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent)
|
||||||
|
elif isExit:
|
||||||
|
action = 'exit'
|
||||||
|
value = q[action]
|
||||||
|
valString = '%.2f' % value
|
||||||
|
drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent)
|
||||||
|
else:
|
||||||
|
drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent)
|
||||||
|
pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8))
|
||||||
|
text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
|
||||||
|
|
||||||
|
|
||||||
|
def blank():
|
||||||
|
clear_screen()
|
||||||
|
|
||||||
|
def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent):
|
||||||
|
|
||||||
|
square_color = getColor(0, -1, 1)
|
||||||
|
|
||||||
|
if isObstacle:
|
||||||
|
square_color = OBSTACLE_COLOR
|
||||||
|
|
||||||
|
(screen_x, screen_y) = to_screen((x, y))
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.5* GRID_SIZE,
|
||||||
|
color = square_color,
|
||||||
|
filled = 1,
|
||||||
|
width = 1)
|
||||||
|
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.5* GRID_SIZE,
|
||||||
|
color = EDGE_COLOR,
|
||||||
|
filled = 0,
|
||||||
|
width = 3)
|
||||||
|
|
||||||
|
if isTerminal and not isObstacle:
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.4* GRID_SIZE,
|
||||||
|
color = EDGE_COLOR,
|
||||||
|
filled = 0,
|
||||||
|
width = 2)
|
||||||
|
text( (screen_x, screen_y),
|
||||||
|
TEXT_COLOR,
|
||||||
|
str(grid[x][y]),
|
||||||
|
"Courier", -24, "bold", "c")
|
||||||
|
|
||||||
|
|
||||||
|
text_color = TEXT_COLOR
|
||||||
|
|
||||||
|
if not isObstacle and isCurrent:
|
||||||
|
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||||
|
|
||||||
|
# if not isObstacle:
|
||||||
|
# text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c")
|
||||||
|
|
||||||
|
def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent):
|
||||||
|
|
||||||
|
square_color = getColor(val, min, max)
|
||||||
|
|
||||||
|
if isObstacle:
|
||||||
|
square_color = OBSTACLE_COLOR
|
||||||
|
|
||||||
|
(screen_x, screen_y) = to_screen((x, y))
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.5* GRID_SIZE,
|
||||||
|
color = square_color,
|
||||||
|
filled = 1,
|
||||||
|
width = 1)
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.5* GRID_SIZE,
|
||||||
|
color = EDGE_COLOR,
|
||||||
|
filled = 0,
|
||||||
|
width = 3)
|
||||||
|
if isTerminal and not isObstacle:
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.4* GRID_SIZE,
|
||||||
|
color = EDGE_COLOR,
|
||||||
|
filled = 0,
|
||||||
|
width = 2)
|
||||||
|
|
||||||
|
|
||||||
|
if action == 'north':
|
||||||
|
polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||||
|
if action == 'south':
|
||||||
|
polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||||
|
if action == 'west':
|
||||||
|
polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||||
|
if action == 'east':
|
||||||
|
polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False)
|
||||||
|
|
||||||
|
|
||||||
|
text_color = TEXT_COLOR
|
||||||
|
|
||||||
|
if not isObstacle and isCurrent:
|
||||||
|
circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||||
|
|
||||||
|
if not isObstacle:
|
||||||
|
text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c")
|
||||||
|
|
||||||
|
|
||||||
|
def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent):
|
||||||
|
|
||||||
|
(screen_x, screen_y) = to_screen((x, y))
|
||||||
|
|
||||||
|
center = (screen_x, screen_y)
|
||||||
|
nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
|
||||||
|
ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE)
|
||||||
|
se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
|
||||||
|
sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE)
|
||||||
|
n = (screen_x, screen_y-0.5*GRID_SIZE+5)
|
||||||
|
s = (screen_x, screen_y+0.5*GRID_SIZE-5)
|
||||||
|
w = (screen_x-0.5*GRID_SIZE+5, screen_y)
|
||||||
|
e = (screen_x+0.5*GRID_SIZE-5, screen_y)
|
||||||
|
|
||||||
|
actions = qVals.keys()
|
||||||
|
for action in actions:
|
||||||
|
|
||||||
|
wedge_color = getColor(qVals[action], minVal, maxVal)
|
||||||
|
|
||||||
|
if action == 'north':
|
||||||
|
polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False)
|
||||||
|
#text(n, text_color, valStr, "Courier", 8, "bold", "n")
|
||||||
|
if action == 'south':
|
||||||
|
polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False)
|
||||||
|
#text(s, text_color, valStr, "Courier", 8, "bold", "s")
|
||||||
|
if action == 'east':
|
||||||
|
polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False)
|
||||||
|
#text(e, text_color, valStr, "Courier", 8, "bold", "e")
|
||||||
|
if action == 'west':
|
||||||
|
polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False)
|
||||||
|
#text(w, text_color, valStr, "Courier", 8, "bold", "w")
|
||||||
|
|
||||||
|
square( (screen_x, screen_y),
|
||||||
|
0.5* GRID_SIZE,
|
||||||
|
color = EDGE_COLOR,
|
||||||
|
filled = 0,
|
||||||
|
width = 3)
|
||||||
|
line(ne, sw, color = EDGE_COLOR)
|
||||||
|
line(nw, se, color = EDGE_COLOR)
|
||||||
|
|
||||||
|
if isCurrent:
|
||||||
|
circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR )
|
||||||
|
|
||||||
|
for action in actions:
|
||||||
|
text_color = TEXT_COLOR
|
||||||
|
if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR
|
||||||
|
valStr = ""
|
||||||
|
if action in valStrs:
|
||||||
|
valStr = valStrs[action]
|
||||||
|
h = -20
|
||||||
|
if action == 'north':
|
||||||
|
#polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0)
|
||||||
|
text(n, text_color, valStr, "Courier", h, "bold", "n")
|
||||||
|
if action == 'south':
|
||||||
|
#polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0)
|
||||||
|
text(s, text_color, valStr, "Courier", h, "bold", "s")
|
||||||
|
if action == 'east':
|
||||||
|
#polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0)
|
||||||
|
text(e, text_color, valStr, "Courier", h, "bold", "e")
|
||||||
|
if action == 'west':
|
||||||
|
#polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0)
|
||||||
|
text(w, text_color, valStr, "Courier", h, "bold", "w")
|
||||||
|
|
||||||
|
|
||||||
|
def getColor(val, minVal, max):
|
||||||
|
r, g = 0.0, 0.0
|
||||||
|
if val < 0 and minVal < 0:
|
||||||
|
r = val * 0.65 / minVal
|
||||||
|
if val > 0 and max > 0:
|
||||||
|
g = val * 0.65 / max
|
||||||
|
return formatColor(r,g,0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def square(pos, size, color, filled, width):
|
||||||
|
x, y = pos
|
||||||
|
dx, dy = size, size
|
||||||
|
return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False)
|
||||||
|
|
||||||
|
|
||||||
|
def to_screen(point):
|
||||||
|
( gamex, gamey ) = point
|
||||||
|
x = gamex*GRID_SIZE + MARGIN
|
||||||
|
y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN
|
||||||
|
return ( x, y )
|
||||||
|
|
||||||
|
def to_grid(point):
|
||||||
|
(x, y) = point
|
||||||
|
x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
|
||||||
|
y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE)
|
||||||
|
print point, "-->", (x, y)
|
||||||
|
return (x, y)
|
||||||
398
p3_rl/graphicsUtils.py
Normal file
398
p3_rl/graphicsUtils.py
Normal file
@@ -0,0 +1,398 @@
|
|||||||
|
# graphicsUtils.py
|
||||||
|
# ----------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
import time
|
||||||
|
import types
|
||||||
|
import Tkinter
|
||||||
|
|
||||||
|
_Windows = sys.platform == 'win32' # True if on Win95/98/NT
|
||||||
|
|
||||||
|
_root_window = None # The root window for graphics output
|
||||||
|
_canvas = None # The canvas which holds graphics
|
||||||
|
_canvas_xs = None # Size of canvas object
|
||||||
|
_canvas_ys = None
|
||||||
|
_canvas_x = None # Current position on canvas
|
||||||
|
_canvas_y = None
|
||||||
|
_canvas_col = None # Current colour (set to black below)
|
||||||
|
_canvas_tsize = 12
|
||||||
|
_canvas_tserifs = 0
|
||||||
|
|
||||||
|
def formatColor(r, g, b):
|
||||||
|
return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255))
|
||||||
|
|
||||||
|
def colorToVector(color):
|
||||||
|
return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]])
|
||||||
|
|
||||||
|
if _Windows:
|
||||||
|
_canvas_tfonts = ['times new roman', 'lucida console']
|
||||||
|
else:
|
||||||
|
_canvas_tfonts = ['times', 'lucidasans-24']
|
||||||
|
pass # XXX need defaults here
|
||||||
|
|
||||||
|
def sleep(secs):
|
||||||
|
global _root_window
|
||||||
|
if _root_window == None:
|
||||||
|
time.sleep(secs)
|
||||||
|
else:
|
||||||
|
_root_window.update_idletasks()
|
||||||
|
_root_window.after(int(1000 * secs), _root_window.quit)
|
||||||
|
_root_window.mainloop()
|
||||||
|
|
||||||
|
def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None):
|
||||||
|
|
||||||
|
global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color
|
||||||
|
|
||||||
|
# Check for duplicate call
|
||||||
|
if _root_window is not None:
|
||||||
|
# Lose the window.
|
||||||
|
_root_window.destroy()
|
||||||
|
|
||||||
|
# Save the canvas size parameters
|
||||||
|
_canvas_xs, _canvas_ys = width - 1, height - 1
|
||||||
|
_canvas_x, _canvas_y = 0, _canvas_ys
|
||||||
|
_bg_color = color
|
||||||
|
|
||||||
|
# Create the root window
|
||||||
|
_root_window = Tkinter.Tk()
|
||||||
|
_root_window.protocol('WM_DELETE_WINDOW', _destroy_window)
|
||||||
|
_root_window.title(title or 'Graphics Window')
|
||||||
|
_root_window.resizable(0, 0)
|
||||||
|
|
||||||
|
# Create the canvas object
|
||||||
|
try:
|
||||||
|
_canvas = Tkinter.Canvas(_root_window, width=width, height=height)
|
||||||
|
_canvas.pack()
|
||||||
|
draw_background()
|
||||||
|
_canvas.update()
|
||||||
|
except:
|
||||||
|
_root_window = None
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Bind to key-down and key-up events
|
||||||
|
_root_window.bind( "<KeyPress>", _keypress )
|
||||||
|
_root_window.bind( "<KeyRelease>", _keyrelease )
|
||||||
|
_root_window.bind( "<FocusIn>", _clear_keys )
|
||||||
|
_root_window.bind( "<FocusOut>", _clear_keys )
|
||||||
|
_root_window.bind( "<Button-1>", _leftclick )
|
||||||
|
_root_window.bind( "<Button-2>", _rightclick )
|
||||||
|
_root_window.bind( "<Button-3>", _rightclick )
|
||||||
|
_root_window.bind( "<Control-Button-1>", _ctrl_leftclick)
|
||||||
|
_clear_keys()
|
||||||
|
|
||||||
|
_leftclick_loc = None
|
||||||
|
_rightclick_loc = None
|
||||||
|
_ctrl_leftclick_loc = None
|
||||||
|
|
||||||
|
def _leftclick(event):
|
||||||
|
global _leftclick_loc
|
||||||
|
_leftclick_loc = (event.x, event.y)
|
||||||
|
|
||||||
|
def _rightclick(event):
|
||||||
|
global _rightclick_loc
|
||||||
|
_rightclick_loc = (event.x, event.y)
|
||||||
|
|
||||||
|
def _ctrl_leftclick(event):
|
||||||
|
global _ctrl_leftclick_loc
|
||||||
|
_ctrl_leftclick_loc = (event.x, event.y)
|
||||||
|
|
||||||
|
def wait_for_click():
|
||||||
|
while True:
|
||||||
|
global _leftclick_loc
|
||||||
|
global _rightclick_loc
|
||||||
|
global _ctrl_leftclick_loc
|
||||||
|
if _leftclick_loc != None:
|
||||||
|
val = _leftclick_loc
|
||||||
|
_leftclick_loc = None
|
||||||
|
return val, 'left'
|
||||||
|
if _rightclick_loc != None:
|
||||||
|
val = _rightclick_loc
|
||||||
|
_rightclick_loc = None
|
||||||
|
return val, 'right'
|
||||||
|
if _ctrl_leftclick_loc != None:
|
||||||
|
val = _ctrl_leftclick_loc
|
||||||
|
_ctrl_leftclick_loc = None
|
||||||
|
return val, 'ctrl_left'
|
||||||
|
sleep(0.05)
|
||||||
|
|
||||||
|
def draw_background():
|
||||||
|
corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)]
|
||||||
|
polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False)
|
||||||
|
|
||||||
|
def _destroy_window(event=None):
|
||||||
|
sys.exit(0)
|
||||||
|
# global _root_window
|
||||||
|
# _root_window.destroy()
|
||||||
|
# _root_window = None
|
||||||
|
#print "DESTROY"
|
||||||
|
|
||||||
|
def end_graphics():
|
||||||
|
global _root_window, _canvas, _mouse_enabled
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
sleep(1)
|
||||||
|
if _root_window != None:
|
||||||
|
_root_window.destroy()
|
||||||
|
except SystemExit, e:
|
||||||
|
print 'Ending graphics raised an exception:', e
|
||||||
|
finally:
|
||||||
|
_root_window = None
|
||||||
|
_canvas = None
|
||||||
|
_mouse_enabled = 0
|
||||||
|
_clear_keys()
|
||||||
|
|
||||||
|
def clear_screen(background=None):
|
||||||
|
global _canvas_x, _canvas_y
|
||||||
|
_canvas.delete('all')
|
||||||
|
draw_background()
|
||||||
|
_canvas_x, _canvas_y = 0, _canvas_ys
|
||||||
|
|
||||||
|
def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1):
|
||||||
|
c = []
|
||||||
|
for coord in coords:
|
||||||
|
c.append(coord[0])
|
||||||
|
c.append(coord[1])
|
||||||
|
if fillColor == None: fillColor = outlineColor
|
||||||
|
if filled == 0: fillColor = ""
|
||||||
|
poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width)
|
||||||
|
if behind > 0:
|
||||||
|
_canvas.tag_lower(poly, behind) # Higher should be more visible
|
||||||
|
return poly
|
||||||
|
|
||||||
|
def square(pos, r, color, filled=1, behind=0):
|
||||||
|
x, y = pos
|
||||||
|
coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)]
|
||||||
|
return polygon(coords, color, color, filled, 0, behind=behind)
|
||||||
|
|
||||||
|
def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2):
|
||||||
|
x, y = pos
|
||||||
|
x0, x1 = x - r - 1, x + r
|
||||||
|
y0, y1 = y - r - 1, y + r
|
||||||
|
if endpoints == None:
|
||||||
|
e = [0, 359]
|
||||||
|
else:
|
||||||
|
e = list(endpoints)
|
||||||
|
while e[0] > e[1]: e[1] = e[1] + 360
|
||||||
|
|
||||||
|
return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor,
|
||||||
|
extent=e[1] - e[0], start=e[0], style=style, width=width)
|
||||||
|
|
||||||
|
def image(pos, file="../../blueghost.gif"):
|
||||||
|
x, y = pos
|
||||||
|
# img = PhotoImage(file=file)
|
||||||
|
return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW)
|
||||||
|
|
||||||
|
|
||||||
|
def refresh():
|
||||||
|
_canvas.update_idletasks()
|
||||||
|
|
||||||
|
def moveCircle(id, pos, r, endpoints=None):
|
||||||
|
global _canvas_x, _canvas_y
|
||||||
|
|
||||||
|
x, y = pos
|
||||||
|
# x0, x1 = x - r, x + r + 1
|
||||||
|
# y0, y1 = y - r, y + r + 1
|
||||||
|
x0, x1 = x - r - 1, x + r
|
||||||
|
y0, y1 = y - r - 1, y + r
|
||||||
|
if endpoints == None:
|
||||||
|
e = [0, 359]
|
||||||
|
else:
|
||||||
|
e = list(endpoints)
|
||||||
|
while e[0] > e[1]: e[1] = e[1] + 360
|
||||||
|
|
||||||
|
edit(id, ('start', e[0]), ('extent', e[1] - e[0]))
|
||||||
|
move_to(id, x0, y0)
|
||||||
|
|
||||||
|
def edit(id, *args):
|
||||||
|
_canvas.itemconfigure(id, **dict(args))
|
||||||
|
|
||||||
|
def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"):
|
||||||
|
global _canvas_x, _canvas_y
|
||||||
|
x, y = pos
|
||||||
|
font = (font, str(size), style)
|
||||||
|
return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor)
|
||||||
|
|
||||||
|
def changeText(id, newText, font=None, size=12, style='normal'):
|
||||||
|
_canvas.itemconfigure(id, text=newText)
|
||||||
|
if font != None:
|
||||||
|
_canvas.itemconfigure(id, font=(font, '-%d' % size, style))
|
||||||
|
|
||||||
|
def changeColor(id, newColor):
|
||||||
|
_canvas.itemconfigure(id, fill=newColor)
|
||||||
|
|
||||||
|
def line(here, there, color=formatColor(0, 0, 0), width=2):
|
||||||
|
x0, y0 = here[0], here[1]
|
||||||
|
x1, y1 = there[0], there[1]
|
||||||
|
return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width)
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
### Keypress handling ########################################################
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# We bind to key-down and key-up events.
|
||||||
|
|
||||||
|
_keysdown = {}
|
||||||
|
_keyswaiting = {}
|
||||||
|
# This holds an unprocessed key release. We delay key releases by up to
|
||||||
|
# one call to keys_pressed() to get round a problem with auto repeat.
|
||||||
|
_got_release = None
|
||||||
|
|
||||||
|
def _keypress(event):
|
||||||
|
global _got_release
|
||||||
|
#remap_arrows(event)
|
||||||
|
_keysdown[event.keysym] = 1
|
||||||
|
_keyswaiting[event.keysym] = 1
|
||||||
|
# print event.char, event.keycode
|
||||||
|
_got_release = None
|
||||||
|
|
||||||
|
def _keyrelease(event):
|
||||||
|
global _got_release
|
||||||
|
#remap_arrows(event)
|
||||||
|
try:
|
||||||
|
del _keysdown[event.keysym]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
_got_release = 1
|
||||||
|
|
||||||
|
def remap_arrows(event):
|
||||||
|
# TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT)
|
||||||
|
if event.char in ['a', 's', 'd', 'w']:
|
||||||
|
return
|
||||||
|
if event.keycode in [37, 101]: # LEFT ARROW (win / x)
|
||||||
|
event.char = 'a'
|
||||||
|
if event.keycode in [38, 99]: # UP ARROW
|
||||||
|
event.char = 'w'
|
||||||
|
if event.keycode in [39, 102]: # RIGHT ARROW
|
||||||
|
event.char = 'd'
|
||||||
|
if event.keycode in [40, 104]: # DOWN ARROW
|
||||||
|
event.char = 's'
|
||||||
|
|
||||||
|
def _clear_keys(event=None):
|
||||||
|
global _keysdown, _got_release, _keyswaiting
|
||||||
|
_keysdown = {}
|
||||||
|
_keyswaiting = {}
|
||||||
|
_got_release = None
|
||||||
|
|
||||||
|
def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent,
|
||||||
|
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||||
|
d_o_e(d_w)
|
||||||
|
if _got_release:
|
||||||
|
d_o_e(d_w)
|
||||||
|
return _keysdown.keys()
|
||||||
|
|
||||||
|
def keys_waiting():
|
||||||
|
global _keyswaiting
|
||||||
|
keys = _keyswaiting.keys()
|
||||||
|
_keyswaiting = {}
|
||||||
|
return keys
|
||||||
|
|
||||||
|
# Block for a list of keys...
|
||||||
|
|
||||||
|
def wait_for_keys():
|
||||||
|
keys = []
|
||||||
|
while keys == []:
|
||||||
|
keys = keys_pressed()
|
||||||
|
sleep(0.05)
|
||||||
|
return keys
|
||||||
|
|
||||||
|
def remove_from_screen(x,
|
||||||
|
d_o_e=Tkinter.tkinter.dooneevent,
|
||||||
|
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||||
|
_canvas.delete(x)
|
||||||
|
d_o_e(d_w)
|
||||||
|
|
||||||
|
def _adjust_coords(coord_list, x, y):
|
||||||
|
for i in range(0, len(coord_list), 2):
|
||||||
|
coord_list[i] = coord_list[i] + x
|
||||||
|
coord_list[i + 1] = coord_list[i + 1] + y
|
||||||
|
return coord_list
|
||||||
|
|
||||||
|
def move_to(object, x, y=None,
|
||||||
|
d_o_e=Tkinter.tkinter.dooneevent,
|
||||||
|
d_w=Tkinter.tkinter.DONT_WAIT):
|
||||||
|
if y is None:
|
||||||
|
try: x, y = x
|
||||||
|
except: raise 'incomprehensible coordinates'
|
||||||
|
|
||||||
|
horiz = True
|
||||||
|
newCoords = []
|
||||||
|
current_x, current_y = _canvas.coords(object)[0:2] # first point
|
||||||
|
for coord in _canvas.coords(object):
|
||||||
|
if horiz:
|
||||||
|
inc = x - current_x
|
||||||
|
else:
|
||||||
|
inc = y - current_y
|
||||||
|
horiz = not horiz
|
||||||
|
|
||||||
|
newCoords.append(coord + inc)
|
||||||
|
|
||||||
|
_canvas.coords(object, *newCoords)
|
||||||
|
d_o_e(d_w)
|
||||||
|
|
||||||
|
def move_by(object, x, y=None,
|
||||||
|
d_o_e=Tkinter.tkinter.dooneevent,
|
||||||
|
d_w=Tkinter.tkinter.DONT_WAIT, lift=False):
|
||||||
|
if y is None:
|
||||||
|
try: x, y = x
|
||||||
|
except: raise Exception, 'incomprehensible coordinates'
|
||||||
|
|
||||||
|
horiz = True
|
||||||
|
newCoords = []
|
||||||
|
for coord in _canvas.coords(object):
|
||||||
|
if horiz:
|
||||||
|
inc = x
|
||||||
|
else:
|
||||||
|
inc = y
|
||||||
|
horiz = not horiz
|
||||||
|
|
||||||
|
newCoords.append(coord + inc)
|
||||||
|
|
||||||
|
_canvas.coords(object, *newCoords)
|
||||||
|
d_o_e(d_w)
|
||||||
|
if lift:
|
||||||
|
_canvas.tag_raise(object)
|
||||||
|
|
||||||
|
def writePostscript(filename):
|
||||||
|
"Writes the current canvas to a postscript file."
|
||||||
|
psfile = file(filename, 'w')
|
||||||
|
psfile.write(_canvas.postscript(pageanchor='sw',
|
||||||
|
y='0.c',
|
||||||
|
x='0.c'))
|
||||||
|
psfile.close()
|
||||||
|
|
||||||
|
ghost_shape = [
|
||||||
|
(0, - 0.5),
|
||||||
|
(0.25, - 0.75),
|
||||||
|
(0.5, - 0.5),
|
||||||
|
(0.75, - 0.75),
|
||||||
|
(0.75, 0.5),
|
||||||
|
(0.5, 0.75),
|
||||||
|
(- 0.5, 0.75),
|
||||||
|
(- 0.75, 0.5),
|
||||||
|
(- 0.75, - 0.75),
|
||||||
|
(- 0.5, - 0.5),
|
||||||
|
(- 0.25, - 0.75)
|
||||||
|
]
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
begin_graphics()
|
||||||
|
clear_screen()
|
||||||
|
ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape]
|
||||||
|
g = polygon(ghost_shape, formatColor(1, 1, 1))
|
||||||
|
move_to(g, (50, 50))
|
||||||
|
circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15])
|
||||||
|
sleep(2)
|
||||||
585
p3_rl/gridworld.py
Normal file
585
p3_rl/gridworld.py
Normal file
@@ -0,0 +1,585 @@
|
|||||||
|
# gridworld.py
|
||||||
|
# ------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
import mdp
|
||||||
|
import environment
|
||||||
|
import util
|
||||||
|
import optparse
|
||||||
|
|
||||||
|
class Gridworld(mdp.MarkovDecisionProcess):
|
||||||
|
"""
|
||||||
|
Gridworld
|
||||||
|
"""
|
||||||
|
def __init__(self, grid):
|
||||||
|
# layout
|
||||||
|
if type(grid) == type([]): grid = makeGrid(grid)
|
||||||
|
self.grid = grid
|
||||||
|
|
||||||
|
# parameters
|
||||||
|
self.livingReward = 0.0
|
||||||
|
self.noise = 0.2
|
||||||
|
|
||||||
|
def setLivingReward(self, reward):
|
||||||
|
"""
|
||||||
|
The (negative) reward for exiting "normal" states.
|
||||||
|
|
||||||
|
Note that in the R+N text, this reward is on entering
|
||||||
|
a state and therefore is not clearly part of the state's
|
||||||
|
future rewards.
|
||||||
|
"""
|
||||||
|
self.livingReward = reward
|
||||||
|
|
||||||
|
def setNoise(self, noise):
|
||||||
|
"""
|
||||||
|
The probability of moving in an unintended direction.
|
||||||
|
"""
|
||||||
|
self.noise = noise
|
||||||
|
|
||||||
|
|
||||||
|
def getPossibleActions(self, state):
|
||||||
|
"""
|
||||||
|
Returns list of valid actions for 'state'.
|
||||||
|
|
||||||
|
Note that you can request moves into walls and
|
||||||
|
that "exit" states transition to the terminal
|
||||||
|
state under the special action "done".
|
||||||
|
"""
|
||||||
|
if state == self.grid.terminalState:
|
||||||
|
return ()
|
||||||
|
x,y = state
|
||||||
|
if type(self.grid[x][y]) == int:
|
||||||
|
return ('exit',)
|
||||||
|
return ('north','west','south','east')
|
||||||
|
|
||||||
|
def getStates(self):
|
||||||
|
"""
|
||||||
|
Return list of all states.
|
||||||
|
"""
|
||||||
|
# The true terminal state.
|
||||||
|
states = [self.grid.terminalState]
|
||||||
|
for x in range(self.grid.width):
|
||||||
|
for y in range(self.grid.height):
|
||||||
|
if self.grid[x][y] != '#':
|
||||||
|
state = (x,y)
|
||||||
|
states.append(state)
|
||||||
|
return states
|
||||||
|
|
||||||
|
def getReward(self, state, action, nextState):
|
||||||
|
"""
|
||||||
|
Get reward for state, action, nextState transition.
|
||||||
|
|
||||||
|
Note that the reward depends only on the state being
|
||||||
|
departed (as in the R+N book examples, which more or
|
||||||
|
less use this convention).
|
||||||
|
"""
|
||||||
|
if state == self.grid.terminalState:
|
||||||
|
return 0.0
|
||||||
|
x, y = state
|
||||||
|
cell = self.grid[x][y]
|
||||||
|
if type(cell) == int or type(cell) == float:
|
||||||
|
return cell
|
||||||
|
return self.livingReward
|
||||||
|
|
||||||
|
def getStartState(self):
|
||||||
|
for x in range(self.grid.width):
|
||||||
|
for y in range(self.grid.height):
|
||||||
|
if self.grid[x][y] == 'S':
|
||||||
|
return (x, y)
|
||||||
|
raise 'Grid has no start state'
|
||||||
|
|
||||||
|
def isTerminal(self, state):
|
||||||
|
"""
|
||||||
|
Only the TERMINAL_STATE state is *actually* a terminal state.
|
||||||
|
The other "exit" states are technically non-terminals with
|
||||||
|
a single action "exit" which leads to the true terminal state.
|
||||||
|
This convention is to make the grids line up with the examples
|
||||||
|
in the R+N textbook.
|
||||||
|
"""
|
||||||
|
return state == self.grid.terminalState
|
||||||
|
|
||||||
|
|
||||||
|
def getTransitionStatesAndProbs(self, state, action):
|
||||||
|
"""
|
||||||
|
Returns list of (nextState, prob) pairs
|
||||||
|
representing the states reachable
|
||||||
|
from 'state' by taking 'action' along
|
||||||
|
with their transition probabilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if action not in self.getPossibleActions(state):
|
||||||
|
raise "Illegal action!"
|
||||||
|
|
||||||
|
if self.isTerminal(state):
|
||||||
|
return []
|
||||||
|
|
||||||
|
x, y = state
|
||||||
|
|
||||||
|
if type(self.grid[x][y]) == int or type(self.grid[x][y]) == float:
|
||||||
|
termState = self.grid.terminalState
|
||||||
|
return [(termState, 1.0)]
|
||||||
|
|
||||||
|
successors = []
|
||||||
|
|
||||||
|
northState = (self.__isAllowed(y+1,x) and (x,y+1)) or state
|
||||||
|
westState = (self.__isAllowed(y,x-1) and (x-1,y)) or state
|
||||||
|
southState = (self.__isAllowed(y-1,x) and (x,y-1)) or state
|
||||||
|
eastState = (self.__isAllowed(y,x+1) and (x+1,y)) or state
|
||||||
|
|
||||||
|
if action == 'north' or action == 'south':
|
||||||
|
if action == 'north':
|
||||||
|
successors.append((northState,1-self.noise))
|
||||||
|
else:
|
||||||
|
successors.append((southState,1-self.noise))
|
||||||
|
|
||||||
|
massLeft = self.noise
|
||||||
|
successors.append((westState,massLeft/2.0))
|
||||||
|
successors.append((eastState,massLeft/2.0))
|
||||||
|
|
||||||
|
if action == 'west' or action == 'east':
|
||||||
|
if action == 'west':
|
||||||
|
successors.append((westState,1-self.noise))
|
||||||
|
else:
|
||||||
|
successors.append((eastState,1-self.noise))
|
||||||
|
|
||||||
|
massLeft = self.noise
|
||||||
|
successors.append((northState,massLeft/2.0))
|
||||||
|
successors.append((southState,massLeft/2.0))
|
||||||
|
|
||||||
|
successors = self.__aggregate(successors)
|
||||||
|
|
||||||
|
return successors
|
||||||
|
|
||||||
|
def __aggregate(self, statesAndProbs):
|
||||||
|
counter = util.Counter()
|
||||||
|
for state, prob in statesAndProbs:
|
||||||
|
counter[state] += prob
|
||||||
|
newStatesAndProbs = []
|
||||||
|
for state, prob in counter.items():
|
||||||
|
newStatesAndProbs.append((state, prob))
|
||||||
|
return newStatesAndProbs
|
||||||
|
|
||||||
|
def __isAllowed(self, y, x):
|
||||||
|
if y < 0 or y >= self.grid.height: return False
|
||||||
|
if x < 0 or x >= self.grid.width: return False
|
||||||
|
return self.grid[x][y] != '#'
|
||||||
|
|
||||||
|
class GridworldEnvironment(environment.Environment):
|
||||||
|
|
||||||
|
def __init__(self, gridWorld):
|
||||||
|
self.gridWorld = gridWorld
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def getCurrentState(self):
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
def getPossibleActions(self, state):
|
||||||
|
return self.gridWorld.getPossibleActions(state)
|
||||||
|
|
||||||
|
def doAction(self, action):
|
||||||
|
state = self.getCurrentState()
|
||||||
|
(nextState, reward) = self.getRandomNextState(state, action)
|
||||||
|
self.state = nextState
|
||||||
|
return (nextState, reward)
|
||||||
|
|
||||||
|
def getRandomNextState(self, state, action, randObj=None):
|
||||||
|
rand = -1.0
|
||||||
|
if randObj is None:
|
||||||
|
rand = random.random()
|
||||||
|
else:
|
||||||
|
rand = randObj.random()
|
||||||
|
sum = 0.0
|
||||||
|
successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
|
||||||
|
for nextState, prob in successors:
|
||||||
|
sum += prob
|
||||||
|
if sum > 1.0:
|
||||||
|
raise 'Total transition probability more than one; sample failure.'
|
||||||
|
if rand < sum:
|
||||||
|
reward = self.gridWorld.getReward(state, action, nextState)
|
||||||
|
return (nextState, reward)
|
||||||
|
raise 'Total transition probability less than one; sample failure.'
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.state = self.gridWorld.getStartState()
|
||||||
|
|
||||||
|
class Grid:
|
||||||
|
"""
|
||||||
|
A 2-dimensional array of immutables backed by a list of lists. Data is accessed
|
||||||
|
via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
|
||||||
|
y vertical and the origin (0,0) in the bottom left corner.
|
||||||
|
|
||||||
|
The __str__ method constructs an output that is oriented appropriately.
|
||||||
|
"""
|
||||||
|
def __init__(self, width, height, initialValue=' '):
|
||||||
|
self.width = width
|
||||||
|
self.height = height
|
||||||
|
self.data = [[initialValue for y in range(height)] for x in range(width)]
|
||||||
|
self.terminalState = 'TERMINAL_STATE'
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self.data[i]
|
||||||
|
|
||||||
|
def __setitem__(self, key, item):
|
||||||
|
self.data[key] = item
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if other == None: return False
|
||||||
|
return self.data == other.data
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.data)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
g = Grid(self.width, self.height)
|
||||||
|
g.data = [x[:] for x in self.data]
|
||||||
|
return g
|
||||||
|
|
||||||
|
def deepCopy(self):
|
||||||
|
return self.copy()
|
||||||
|
|
||||||
|
def shallowCopy(self):
|
||||||
|
g = Grid(self.width, self.height)
|
||||||
|
g.data = self.data
|
||||||
|
return g
|
||||||
|
|
||||||
|
def _getLegacyText(self):
|
||||||
|
t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
|
||||||
|
t.reverse()
|
||||||
|
return t
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self._getLegacyText())
|
||||||
|
|
||||||
|
def makeGrid(gridString):
|
||||||
|
width, height = len(gridString[0]), len(gridString)
|
||||||
|
grid = Grid(width, height)
|
||||||
|
for ybar, line in enumerate(gridString):
|
||||||
|
y = height - ybar - 1
|
||||||
|
for x, el in enumerate(line):
|
||||||
|
grid[x][y] = el
|
||||||
|
return grid
|
||||||
|
|
||||||
|
def getCliffGrid():
|
||||||
|
grid = [[' ',' ',' ',' ',' '],
|
||||||
|
['S',' ',' ',' ',10],
|
||||||
|
[-100,-100, -100, -100, -100]]
|
||||||
|
return Gridworld(makeGrid(grid))
|
||||||
|
|
||||||
|
def getCliffGrid2():
|
||||||
|
grid = [[' ',' ',' ',' ',' '],
|
||||||
|
[8,'S',' ',' ',10],
|
||||||
|
[-100,-100, -100, -100, -100]]
|
||||||
|
return Gridworld(grid)
|
||||||
|
|
||||||
|
def getDiscountGrid():
|
||||||
|
grid = [[' ',' ',' ',' ',' '],
|
||||||
|
[' ','#',' ',' ',' '],
|
||||||
|
[' ','#', 1,'#', 10],
|
||||||
|
['S',' ',' ',' ',' '],
|
||||||
|
[-10,-10, -10, -10, -10]]
|
||||||
|
return Gridworld(grid)
|
||||||
|
|
||||||
|
def getBridgeGrid():
|
||||||
|
grid = [[ '#',-100, -100, -100, -100, -100, '#'],
|
||||||
|
[ 1, 'S', ' ', ' ', ' ', ' ', 10],
|
||||||
|
[ '#',-100, -100, -100, -100, -100, '#']]
|
||||||
|
return Gridworld(grid)
|
||||||
|
|
||||||
|
def getBookGrid():
|
||||||
|
grid = [[' ',' ',' ',+1],
|
||||||
|
[' ','#',' ',-1],
|
||||||
|
['S',' ',' ',' ']]
|
||||||
|
return Gridworld(grid)
|
||||||
|
|
||||||
|
def getMazeGrid():
|
||||||
|
grid = [[' ',' ',' ',+1],
|
||||||
|
['#','#',' ','#'],
|
||||||
|
[' ','#',' ',' '],
|
||||||
|
[' ','#','#',' '],
|
||||||
|
['S',' ',' ',' ']]
|
||||||
|
return Gridworld(grid)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getUserAction(state, actionFunction):
|
||||||
|
"""
|
||||||
|
Get an action from the user (rather than the agent).
|
||||||
|
|
||||||
|
Used for debugging and lecture demos.
|
||||||
|
"""
|
||||||
|
import graphicsUtils
|
||||||
|
action = None
|
||||||
|
while True:
|
||||||
|
keys = graphicsUtils.wait_for_keys()
|
||||||
|
if 'Up' in keys: action = 'north'
|
||||||
|
if 'Down' in keys: action = 'south'
|
||||||
|
if 'Left' in keys: action = 'west'
|
||||||
|
if 'Right' in keys: action = 'east'
|
||||||
|
if 'q' in keys: sys.exit(0)
|
||||||
|
if action == None: continue
|
||||||
|
break
|
||||||
|
actions = actionFunction(state)
|
||||||
|
if action not in actions:
|
||||||
|
action = actions[0]
|
||||||
|
return action
|
||||||
|
|
||||||
|
def printString(x): print x
|
||||||
|
|
||||||
|
def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
|
||||||
|
returns = 0
|
||||||
|
totalDiscount = 1.0
|
||||||
|
environment.reset()
|
||||||
|
if 'startEpisode' in dir(agent): agent.startEpisode()
|
||||||
|
message("BEGINNING EPISODE: "+str(episode)+"\n")
|
||||||
|
while True:
|
||||||
|
|
||||||
|
# DISPLAY CURRENT STATE
|
||||||
|
state = environment.getCurrentState()
|
||||||
|
display(state)
|
||||||
|
pause()
|
||||||
|
|
||||||
|
# END IF IN A TERMINAL STATE
|
||||||
|
actions = environment.getPossibleActions(state)
|
||||||
|
if len(actions) == 0:
|
||||||
|
message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
|
||||||
|
return returns
|
||||||
|
|
||||||
|
# GET ACTION (USUALLY FROM AGENT)
|
||||||
|
action = decision(state)
|
||||||
|
if action == None:
|
||||||
|
raise 'Error: Agent returned None action'
|
||||||
|
|
||||||
|
# EXECUTE ACTION
|
||||||
|
nextState, reward = environment.doAction(action)
|
||||||
|
message("Started in state: "+str(state)+
|
||||||
|
"\nTook action: "+str(action)+
|
||||||
|
"\nEnded in state: "+str(nextState)+
|
||||||
|
"\nGot reward: "+str(reward)+"\n")
|
||||||
|
# UPDATE LEARNER
|
||||||
|
if 'observeTransition' in dir(agent):
|
||||||
|
agent.observeTransition(state, action, nextState, reward)
|
||||||
|
|
||||||
|
returns += reward * totalDiscount
|
||||||
|
totalDiscount *= discount
|
||||||
|
|
||||||
|
if 'stopEpisode' in dir(agent):
|
||||||
|
agent.stopEpisode()
|
||||||
|
|
||||||
|
def parseOptions():
|
||||||
|
optParser = optparse.OptionParser()
|
||||||
|
optParser.add_option('-d', '--discount',action='store',
|
||||||
|
type='float',dest='discount',default=0.9,
|
||||||
|
help='Discount on future (default %default)')
|
||||||
|
optParser.add_option('-r', '--livingReward',action='store',
|
||||||
|
type='float',dest='livingReward',default=0.0,
|
||||||
|
metavar="R", help='Reward for living for a time step (default %default)')
|
||||||
|
optParser.add_option('-n', '--noise',action='store',
|
||||||
|
type='float',dest='noise',default=0.2,
|
||||||
|
metavar="P", help='How often action results in ' +
|
||||||
|
'unintended direction (default %default)' )
|
||||||
|
optParser.add_option('-e', '--epsilon',action='store',
|
||||||
|
type='float',dest='epsilon',default=0.3,
|
||||||
|
metavar="E", help='Chance of taking a random action in q-learning (default %default)')
|
||||||
|
optParser.add_option('-l', '--learningRate',action='store',
|
||||||
|
type='float',dest='learningRate',default=0.5,
|
||||||
|
metavar="P", help='TD learning rate (default %default)' )
|
||||||
|
optParser.add_option('-i', '--iterations',action='store',
|
||||||
|
type='int',dest='iters',default=10,
|
||||||
|
metavar="K", help='Number of rounds of value iteration (default %default)')
|
||||||
|
optParser.add_option('-k', '--episodes',action='store',
|
||||||
|
type='int',dest='episodes',default=1,
|
||||||
|
metavar="K", help='Number of epsiodes of the MDP to run (default %default)')
|
||||||
|
optParser.add_option('-g', '--grid',action='store',
|
||||||
|
metavar="G", type='string',dest='grid',default="BookGrid",
|
||||||
|
help='Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)' )
|
||||||
|
optParser.add_option('-w', '--windowSize', metavar="X", type='int',dest='gridSize',default=150,
|
||||||
|
help='Request a window width of X pixels *per grid cell* (default %default)')
|
||||||
|
optParser.add_option('-a', '--agent',action='store', metavar="A",
|
||||||
|
type='string',dest='agent',default="random",
|
||||||
|
help='Agent type (options are \'random\', \'value\' and \'q\', default %default)')
|
||||||
|
optParser.add_option('-t', '--text',action='store_true',
|
||||||
|
dest='textDisplay',default=False,
|
||||||
|
help='Use text-only ASCII display')
|
||||||
|
optParser.add_option('-p', '--pause',action='store_true',
|
||||||
|
dest='pause',default=False,
|
||||||
|
help='Pause GUI after each time step when running the MDP')
|
||||||
|
optParser.add_option('-q', '--quiet',action='store_true',
|
||||||
|
dest='quiet',default=False,
|
||||||
|
help='Skip display of any learning episodes')
|
||||||
|
optParser.add_option('-s', '--speed',action='store', metavar="S", type=float,
|
||||||
|
dest='speed',default=1.0,
|
||||||
|
help='Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)')
|
||||||
|
optParser.add_option('-m', '--manual',action='store_true',
|
||||||
|
dest='manual',default=False,
|
||||||
|
help='Manually control agent')
|
||||||
|
optParser.add_option('-v', '--valueSteps',action='store_true' ,default=False,
|
||||||
|
help='Display each step of value iteration')
|
||||||
|
|
||||||
|
opts, args = optParser.parse_args()
|
||||||
|
|
||||||
|
if opts.manual and opts.agent != 'q':
|
||||||
|
print '## Disabling Agents in Manual Mode (-m) ##'
|
||||||
|
opts.agent = None
|
||||||
|
|
||||||
|
# MANAGE CONFLICTS
|
||||||
|
if opts.textDisplay or opts.quiet:
|
||||||
|
# if opts.quiet:
|
||||||
|
opts.pause = False
|
||||||
|
# opts.manual = False
|
||||||
|
|
||||||
|
if opts.manual:
|
||||||
|
opts.pause = True
|
||||||
|
|
||||||
|
return opts
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
opts = parseOptions()
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# GET THE GRIDWORLD
|
||||||
|
###########################
|
||||||
|
|
||||||
|
import gridworld
|
||||||
|
mdpFunction = getattr(gridworld, "get"+opts.grid)
|
||||||
|
mdp = mdpFunction()
|
||||||
|
mdp.setLivingReward(opts.livingReward)
|
||||||
|
mdp.setNoise(opts.noise)
|
||||||
|
env = gridworld.GridworldEnvironment(mdp)
|
||||||
|
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# GET THE DISPLAY ADAPTER
|
||||||
|
###########################
|
||||||
|
|
||||||
|
import textGridworldDisplay
|
||||||
|
display = textGridworldDisplay.TextGridworldDisplay(mdp)
|
||||||
|
if not opts.textDisplay:
|
||||||
|
import graphicsGridworldDisplay
|
||||||
|
display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
|
||||||
|
try:
|
||||||
|
display.start()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# GET THE AGENT
|
||||||
|
###########################
|
||||||
|
|
||||||
|
import valueIterationAgents, qlearningAgents
|
||||||
|
a = None
|
||||||
|
if opts.agent == 'value':
|
||||||
|
a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
|
||||||
|
elif opts.agent == 'q':
|
||||||
|
#env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
|
||||||
|
#simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
|
||||||
|
gridWorldEnv = GridworldEnvironment(mdp)
|
||||||
|
actionFn = lambda state: mdp.getPossibleActions(state)
|
||||||
|
qLearnOpts = {'gamma': opts.discount,
|
||||||
|
'alpha': opts.learningRate,
|
||||||
|
'epsilon': opts.epsilon,
|
||||||
|
'actionFn': actionFn}
|
||||||
|
a = qlearningAgents.QLearningAgent(**qLearnOpts)
|
||||||
|
elif opts.agent == 'random':
|
||||||
|
# # No reason to use the random agent without episodes
|
||||||
|
if opts.episodes == 0:
|
||||||
|
opts.episodes = 10
|
||||||
|
class RandomAgent:
|
||||||
|
def getAction(self, state):
|
||||||
|
return random.choice(mdp.getPossibleActions(state))
|
||||||
|
def getValue(self, state):
|
||||||
|
return 0.0
|
||||||
|
def getQValue(self, state, action):
|
||||||
|
return 0.0
|
||||||
|
def getPolicy(self, state):
|
||||||
|
"NOTE: 'random' is a special policy value; don't use it in your code."
|
||||||
|
return 'random'
|
||||||
|
def update(self, state, action, nextState, reward):
|
||||||
|
pass
|
||||||
|
a = RandomAgent()
|
||||||
|
else:
|
||||||
|
if not opts.manual: raise 'Unknown agent type: '+opts.agent
|
||||||
|
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# RUN EPISODES
|
||||||
|
###########################
|
||||||
|
# DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
|
||||||
|
try:
|
||||||
|
if not opts.manual and opts.agent == 'value':
|
||||||
|
if opts.valueSteps:
|
||||||
|
for i in range(opts.iters):
|
||||||
|
tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
|
||||||
|
display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
|
||||||
|
display.pause()
|
||||||
|
|
||||||
|
display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
|
||||||
|
display.pause()
|
||||||
|
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
|
||||||
|
display.pause()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
|
||||||
|
displayCallback = lambda x: None
|
||||||
|
if not opts.quiet:
|
||||||
|
if opts.manual and opts.agent == None:
|
||||||
|
displayCallback = lambda state: display.displayNullValues(state)
|
||||||
|
else:
|
||||||
|
if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
|
||||||
|
if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
|
||||||
|
if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")
|
||||||
|
|
||||||
|
messageCallback = lambda x: printString(x)
|
||||||
|
if opts.quiet:
|
||||||
|
messageCallback = lambda x: None
|
||||||
|
|
||||||
|
# FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
|
||||||
|
pauseCallback = lambda : None
|
||||||
|
if opts.pause:
|
||||||
|
pauseCallback = lambda : display.pause()
|
||||||
|
|
||||||
|
# FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
|
||||||
|
if opts.manual:
|
||||||
|
decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
|
||||||
|
else:
|
||||||
|
decisionCallback = a.getAction
|
||||||
|
|
||||||
|
# RUN EPISODES
|
||||||
|
if opts.episodes > 0:
|
||||||
|
print
|
||||||
|
print "RUNNING", opts.episodes, "EPISODES"
|
||||||
|
print
|
||||||
|
returns = 0
|
||||||
|
for episode in range(1, opts.episodes+1):
|
||||||
|
returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
|
||||||
|
if opts.episodes > 0:
|
||||||
|
print
|
||||||
|
print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)
|
||||||
|
print
|
||||||
|
print
|
||||||
|
|
||||||
|
# DISPLAY POST-LEARNING VALUES / Q-VALUES
|
||||||
|
if opts.agent == 'q' and not opts.manual:
|
||||||
|
try:
|
||||||
|
display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
|
||||||
|
display.pause()
|
||||||
|
display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
|
||||||
|
display.pause()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
||||||
84
p3_rl/keyboardAgents.py
Normal file
84
p3_rl/keyboardAgents.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# keyboardAgents.py
|
||||||
|
# -----------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from game import Agent
|
||||||
|
from game import Directions
|
||||||
|
import random
|
||||||
|
|
||||||
|
class KeyboardAgent(Agent):
|
||||||
|
"""
|
||||||
|
An agent controlled by the keyboard.
|
||||||
|
"""
|
||||||
|
# NOTE: Arrow keys also work.
|
||||||
|
WEST_KEY = 'a'
|
||||||
|
EAST_KEY = 'd'
|
||||||
|
NORTH_KEY = 'w'
|
||||||
|
SOUTH_KEY = 's'
|
||||||
|
STOP_KEY = 'q'
|
||||||
|
|
||||||
|
def __init__( self, index = 0 ):
|
||||||
|
|
||||||
|
self.lastMove = Directions.STOP
|
||||||
|
self.index = index
|
||||||
|
self.keys = []
|
||||||
|
|
||||||
|
def getAction( self, state):
|
||||||
|
from graphicsUtils import keys_waiting
|
||||||
|
from graphicsUtils import keys_pressed
|
||||||
|
keys = keys_waiting() + keys_pressed()
|
||||||
|
if keys != []:
|
||||||
|
self.keys = keys
|
||||||
|
|
||||||
|
legal = state.getLegalActions(self.index)
|
||||||
|
move = self.getMove(legal)
|
||||||
|
|
||||||
|
if move == Directions.STOP:
|
||||||
|
# Try to move in the same direction as before
|
||||||
|
if self.lastMove in legal:
|
||||||
|
move = self.lastMove
|
||||||
|
|
||||||
|
if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP
|
||||||
|
|
||||||
|
if move not in legal:
|
||||||
|
move = random.choice(legal)
|
||||||
|
|
||||||
|
self.lastMove = move
|
||||||
|
return move
|
||||||
|
|
||||||
|
def getMove(self, legal):
|
||||||
|
move = Directions.STOP
|
||||||
|
if (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal: move = Directions.WEST
|
||||||
|
if (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST
|
||||||
|
if (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
|
||||||
|
if (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
|
||||||
|
return move
|
||||||
|
|
||||||
|
class KeyboardAgent2(KeyboardAgent):
|
||||||
|
"""
|
||||||
|
A second agent controlled by the keyboard.
|
||||||
|
"""
|
||||||
|
# NOTE: Arrow keys also work.
|
||||||
|
WEST_KEY = 'j'
|
||||||
|
EAST_KEY = "l"
|
||||||
|
NORTH_KEY = 'i'
|
||||||
|
SOUTH_KEY = 'k'
|
||||||
|
STOP_KEY = 'u'
|
||||||
|
|
||||||
|
def getMove(self, legal):
|
||||||
|
move = Directions.STOP
|
||||||
|
if (self.WEST_KEY in self.keys) and Directions.WEST in legal: move = Directions.WEST
|
||||||
|
if (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST
|
||||||
|
if (self.NORTH_KEY in self.keys) and Directions.NORTH in legal: move = Directions.NORTH
|
||||||
|
if (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH
|
||||||
|
return move
|
||||||
149
p3_rl/layout.py
Normal file
149
p3_rl/layout.py
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
# layout.py
|
||||||
|
# ---------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from util import manhattanDistance
|
||||||
|
from game import Grid
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
VISIBILITY_MATRIX_CACHE = {}
|
||||||
|
|
||||||
|
class Layout:
|
||||||
|
"""
|
||||||
|
A Layout manages the static information about the game board.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, layoutText):
|
||||||
|
self.width = len(layoutText[0])
|
||||||
|
self.height= len(layoutText)
|
||||||
|
self.walls = Grid(self.width, self.height, False)
|
||||||
|
self.food = Grid(self.width, self.height, False)
|
||||||
|
self.capsules = []
|
||||||
|
self.agentPositions = []
|
||||||
|
self.numGhosts = 0
|
||||||
|
self.processLayoutText(layoutText)
|
||||||
|
self.layoutText = layoutText
|
||||||
|
self.totalFood = len(self.food.asList())
|
||||||
|
# self.initializeVisibilityMatrix()
|
||||||
|
|
||||||
|
def getNumGhosts(self):
|
||||||
|
return self.numGhosts
|
||||||
|
|
||||||
|
def initializeVisibilityMatrix(self):
|
||||||
|
global VISIBILITY_MATRIX_CACHE
|
||||||
|
if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE:
|
||||||
|
from game import Directions
|
||||||
|
vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)]
|
||||||
|
dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST]
|
||||||
|
vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()})
|
||||||
|
for x in range(self.width):
|
||||||
|
for y in range(self.height):
|
||||||
|
if self.walls[x][y] == False:
|
||||||
|
for vec, direction in zip(vecs, dirs):
|
||||||
|
dx, dy = vec
|
||||||
|
nextx, nexty = x + dx, y + dy
|
||||||
|
while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] :
|
||||||
|
vis[x][y][direction].add((nextx, nexty))
|
||||||
|
nextx, nexty = x + dx, y + dy
|
||||||
|
self.visibility = vis
|
||||||
|
VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis
|
||||||
|
else:
|
||||||
|
self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)]
|
||||||
|
|
||||||
|
def isWall(self, pos):
|
||||||
|
x, col = pos
|
||||||
|
return self.walls[x][col]
|
||||||
|
|
||||||
|
def getRandomLegalPosition(self):
|
||||||
|
x = random.choice(range(self.width))
|
||||||
|
y = random.choice(range(self.height))
|
||||||
|
while self.isWall( (x, y) ):
|
||||||
|
x = random.choice(range(self.width))
|
||||||
|
y = random.choice(range(self.height))
|
||||||
|
return (x,y)
|
||||||
|
|
||||||
|
def getRandomCorner(self):
|
||||||
|
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
|
||||||
|
return random.choice(poses)
|
||||||
|
|
||||||
|
def getFurthestCorner(self, pacPos):
|
||||||
|
poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)]
|
||||||
|
dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses])
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def isVisibleFrom(self, ghostPos, pacPos, pacDirection):
|
||||||
|
row, col = [int(x) for x in pacPos]
|
||||||
|
return ghostPos in self.visibility[row][col][pacDirection]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "\n".join(self.layoutText)
|
||||||
|
|
||||||
|
def deepCopy(self):
|
||||||
|
return Layout(self.layoutText[:])
|
||||||
|
|
||||||
|
def processLayoutText(self, layoutText):
|
||||||
|
"""
|
||||||
|
Coordinates are flipped from the input format to the (x,y) convention here
|
||||||
|
|
||||||
|
The shape of the maze. Each character
|
||||||
|
represents a different type of object.
|
||||||
|
% - Wall
|
||||||
|
. - Food
|
||||||
|
o - Capsule
|
||||||
|
G - Ghost
|
||||||
|
P - Pacman
|
||||||
|
Other characters are ignored.
|
||||||
|
"""
|
||||||
|
maxY = self.height - 1
|
||||||
|
for y in range(self.height):
|
||||||
|
for x in range(self.width):
|
||||||
|
layoutChar = layoutText[maxY - y][x]
|
||||||
|
self.processLayoutChar(x, y, layoutChar)
|
||||||
|
self.agentPositions.sort()
|
||||||
|
self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions]
|
||||||
|
|
||||||
|
def processLayoutChar(self, x, y, layoutChar):
|
||||||
|
if layoutChar == '%':
|
||||||
|
self.walls[x][y] = True
|
||||||
|
elif layoutChar == '.':
|
||||||
|
self.food[x][y] = True
|
||||||
|
elif layoutChar == 'o':
|
||||||
|
self.capsules.append((x, y))
|
||||||
|
elif layoutChar == 'P':
|
||||||
|
self.agentPositions.append( (0, (x, y) ) )
|
||||||
|
elif layoutChar in ['G']:
|
||||||
|
self.agentPositions.append( (1, (x, y) ) )
|
||||||
|
self.numGhosts += 1
|
||||||
|
elif layoutChar in ['1', '2', '3', '4']:
|
||||||
|
self.agentPositions.append( (int(layoutChar), (x,y)))
|
||||||
|
self.numGhosts += 1
|
||||||
|
def getLayout(name, back = 2):
|
||||||
|
if name.endswith('.lay'):
|
||||||
|
layout = tryToLoad('layouts/' + name)
|
||||||
|
if layout == None: layout = tryToLoad(name)
|
||||||
|
else:
|
||||||
|
layout = tryToLoad('layouts/' + name + '.lay')
|
||||||
|
if layout == None: layout = tryToLoad(name + '.lay')
|
||||||
|
if layout == None and back >= 0:
|
||||||
|
curdir = os.path.abspath('.')
|
||||||
|
os.chdir('..')
|
||||||
|
layout = getLayout(name, back -1)
|
||||||
|
os.chdir(curdir)
|
||||||
|
return layout
|
||||||
|
|
||||||
|
def tryToLoad(fullname):
|
||||||
|
if(not os.path.exists(fullname)): return None
|
||||||
|
f = open(fullname)
|
||||||
|
try: return Layout([line.strip() for line in f])
|
||||||
|
finally: f.close()
|
||||||
7
p3_rl/layouts/capsuleClassic.lay
Normal file
7
p3_rl/layouts/capsuleClassic.lay
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%
|
||||||
|
%G. G ....%
|
||||||
|
%.% % %%%%%% %.%%.%
|
||||||
|
%.%o% % o% %.o%.%
|
||||||
|
%.%%%.% %%% %..%.%
|
||||||
|
%..... P %..%G%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
9
p3_rl/layouts/contestClassic.lay
Normal file
9
p3_rl/layouts/contestClassic.lay
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%o...%........%...o%
|
||||||
|
%.%%.%.%%..%%.%.%%.%
|
||||||
|
%...... G GG%......%
|
||||||
|
%.%.%%.%% %%%.%%.%.%
|
||||||
|
%.%....% ooo%.%..%.%
|
||||||
|
%.%.%%.% %% %.%.%%.%
|
||||||
|
%o%......P....%....%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
11
p3_rl/layouts/mediumClassic.lay
Normal file
11
p3_rl/layouts/mediumClassic.lay
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%o...%........%....%
|
||||||
|
%.%%.%.%%%%%%.%.%%.%
|
||||||
|
%.%..............%.%
|
||||||
|
%.%.%%.%% %%.%%.%.%
|
||||||
|
%......%G G%......%
|
||||||
|
%.%.%%.%%%%%%.%%.%.%
|
||||||
|
%.%..............%.%
|
||||||
|
%.%%.%.%%%%%%.%.%%.%
|
||||||
|
%....%...P....%...o%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
7
p3_rl/layouts/mediumGrid.lay
Normal file
7
p3_rl/layouts/mediumGrid.lay
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
%%%%%%%%
|
||||||
|
%P %
|
||||||
|
% .% . %
|
||||||
|
% % %
|
||||||
|
% .% . %
|
||||||
|
% G%
|
||||||
|
%%%%%%%%
|
||||||
5
p3_rl/layouts/minimaxClassic.lay
Normal file
5
p3_rl/layouts/minimaxClassic.lay
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
%%%%%%%%%
|
||||||
|
%.P G%
|
||||||
|
% %.%G%%%
|
||||||
|
%G %%%
|
||||||
|
%%%%%%%%%
|
||||||
9
p3_rl/layouts/openClassic.lay
Normal file
9
p3_rl/layouts/openClassic.lay
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%.. P .... .... %
|
||||||
|
%.. ... ... ... ... %
|
||||||
|
%.. ... ... ... ... %
|
||||||
|
%.. .... .... G %
|
||||||
|
%.. ... ... ... ... %
|
||||||
|
%.. ... ... ... ... %
|
||||||
|
%.. .... .... o%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
27
p3_rl/layouts/originalClassic.lay
Normal file
27
p3_rl/layouts/originalClassic.lay
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%............%%............%
|
||||||
|
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||||
|
%o%%%%.%%%%%.%%.%%%%%.%%%%o%
|
||||||
|
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||||
|
%..........................%
|
||||||
|
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
|
||||||
|
%.%%%%.%%.%%%%%%%%.%%.%%%%.%
|
||||||
|
%......%%....%%....%%......%
|
||||||
|
%%%%%%.%%%%% %% %%%%%.%%%%%%
|
||||||
|
%%%%%%.%%%%% %% %%%%%.%%%%%%
|
||||||
|
%%%%%%.% %.%%%%%%
|
||||||
|
%%%%%%.% %%%% %%%% %.%%%%%%
|
||||||
|
% . %G GG G% . %
|
||||||
|
%%%%%%.% %%%%%%%%%% %.%%%%%%
|
||||||
|
%%%%%%.% %.%%%%%%
|
||||||
|
%%%%%%.% %%%%%%%%%% %.%%%%%%
|
||||||
|
%............%%............%
|
||||||
|
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||||
|
%.%%%%.%%%%%.%%.%%%%%.%%%%.%
|
||||||
|
%o..%%....... .......%%..o%
|
||||||
|
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
|
||||||
|
%%%.%%.%%.%%%%%%%%.%%.%%.%%%
|
||||||
|
%......%%....%%....%%......%
|
||||||
|
%.%%%%%%%%%%.%%.%%%%%%%%%%.%
|
||||||
|
%.............P............%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
7
p3_rl/layouts/smallClassic.lay
Normal file
7
p3_rl/layouts/smallClassic.lay
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%......%G G%......%
|
||||||
|
%.%%...%% %%...%%.%
|
||||||
|
%.%o.%........%.o%.%
|
||||||
|
%.%%.%.%%%%%%.%.%%.%
|
||||||
|
%........P.........%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
7
p3_rl/layouts/smallGrid.lay
Normal file
7
p3_rl/layouts/smallGrid.lay
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
%%%%%%%
|
||||||
|
% P %
|
||||||
|
% %%% %
|
||||||
|
% %. %
|
||||||
|
% %%% %
|
||||||
|
%. G %
|
||||||
|
%%%%%%%
|
||||||
10
p3_rl/layouts/testClassic.lay
Normal file
10
p3_rl/layouts/testClassic.lay
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
%%%%%
|
||||||
|
% . %
|
||||||
|
%.G.%
|
||||||
|
% . %
|
||||||
|
%. .%
|
||||||
|
% %
|
||||||
|
% .%
|
||||||
|
% %
|
||||||
|
%P .%
|
||||||
|
%%%%%
|
||||||
5
p3_rl/layouts/trappedClassic.lay
Normal file
5
p3_rl/layouts/trappedClassic.lay
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
%%%%%%%%
|
||||||
|
% P G%
|
||||||
|
%G%%%%%%
|
||||||
|
%.... %
|
||||||
|
%%%%%%%%
|
||||||
13
p3_rl/layouts/trickyClassic.lay
Normal file
13
p3_rl/layouts/trickyClassic.lay
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%o...%........%...o%
|
||||||
|
%.%%.%.%%..%%.%.%%.%
|
||||||
|
%.%.....%..%.....%.%
|
||||||
|
%.%.%%.%% %%.%%.%.%
|
||||||
|
%...... GGGG%.%....%
|
||||||
|
%.%....%%%%%%.%..%.%
|
||||||
|
%.%....% oo%.%..%.%
|
||||||
|
%.%....% %%%%.%..%.%
|
||||||
|
%.%...........%..%.%
|
||||||
|
%.%%.%.%%%%%%.%.%%.%
|
||||||
|
%o...%...P....%...o%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%
|
||||||
258
p3_rl/learningAgents.py
Normal file
258
p3_rl/learningAgents.py
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
# learningAgents.py
|
||||||
|
# -----------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from game import Directions, Agent, Actions
|
||||||
|
|
||||||
|
import random,util,time
|
||||||
|
|
||||||
|
class ValueEstimationAgent(Agent):
|
||||||
|
"""
|
||||||
|
Abstract agent which assigns values to (state,action)
|
||||||
|
Q-Values for an environment. As well as a value to a
|
||||||
|
state and a policy given respectively by,
|
||||||
|
|
||||||
|
V(s) = max_{a in actions} Q(s,a)
|
||||||
|
policy(s) = arg_max_{a in actions} Q(s,a)
|
||||||
|
|
||||||
|
Both ValueIterationAgent and QLearningAgent inherit
|
||||||
|
from this agent. While a ValueIterationAgent has
|
||||||
|
a model of the environment via a MarkovDecisionProcess
|
||||||
|
(see mdp.py) that is used to estimate Q-Values before
|
||||||
|
ever actually acting, the QLearningAgent estimates
|
||||||
|
Q-Values while acting in the environment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10):
|
||||||
|
"""
|
||||||
|
Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,...
|
||||||
|
alpha - learning rate
|
||||||
|
epsilon - exploration rate
|
||||||
|
gamma - discount factor
|
||||||
|
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||||
|
"""
|
||||||
|
self.alpha = float(alpha)
|
||||||
|
self.epsilon = float(epsilon)
|
||||||
|
self.discount = float(gamma)
|
||||||
|
self.numTraining = int(numTraining)
|
||||||
|
|
||||||
|
####################################
|
||||||
|
# Override These Functions #
|
||||||
|
####################################
|
||||||
|
def getQValue(self, state, action):
|
||||||
|
"""
|
||||||
|
Should return Q(state,action)
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getValue(self, state):
|
||||||
|
"""
|
||||||
|
What is the value of this state under the best action?
|
||||||
|
Concretely, this is given by
|
||||||
|
|
||||||
|
V(s) = max_{a in actions} Q(s,a)
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getPolicy(self, state):
|
||||||
|
"""
|
||||||
|
What is the best action to take in the state. Note that because
|
||||||
|
we might want to explore, this might not coincide with getAction
|
||||||
|
Concretely, this is given by
|
||||||
|
|
||||||
|
policy(s) = arg_max_{a in actions} Q(s,a)
|
||||||
|
|
||||||
|
If many actions achieve the maximal Q-value,
|
||||||
|
it doesn't matter which is selected.
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
"""
|
||||||
|
state: can call state.getLegalActions()
|
||||||
|
Choose an action and return it.
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
class ReinforcementAgent(ValueEstimationAgent):
|
||||||
|
"""
|
||||||
|
Abstract Reinforcemnt Agent: A ValueEstimationAgent
|
||||||
|
which estimates Q-Values (as well as policies) from experience
|
||||||
|
rather than a model
|
||||||
|
|
||||||
|
What you need to know:
|
||||||
|
- The environment will call
|
||||||
|
observeTransition(state,action,nextState,deltaReward),
|
||||||
|
which will call update(state, action, nextState, deltaReward)
|
||||||
|
which you should override.
|
||||||
|
- Use self.getLegalActions(state) to know which actions
|
||||||
|
are available in a state
|
||||||
|
"""
|
||||||
|
####################################
|
||||||
|
# Override These Functions #
|
||||||
|
####################################
|
||||||
|
|
||||||
|
def update(self, state, action, nextState, reward):
|
||||||
|
"""
|
||||||
|
This class will call this function, which you write, after
|
||||||
|
observing a transition and reward
|
||||||
|
"""
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
####################################
|
||||||
|
# Read These Functions #
|
||||||
|
####################################
|
||||||
|
|
||||||
|
def getLegalActions(self,state):
|
||||||
|
"""
|
||||||
|
Get the actions available for a given
|
||||||
|
state. This is what you should use to
|
||||||
|
obtain legal actions for a state
|
||||||
|
"""
|
||||||
|
return self.actionFn(state)
|
||||||
|
|
||||||
|
def observeTransition(self, state,action,nextState,deltaReward):
|
||||||
|
"""
|
||||||
|
Called by environment to inform agent that a transition has
|
||||||
|
been observed. This will result in a call to self.update
|
||||||
|
on the same arguments
|
||||||
|
|
||||||
|
NOTE: Do *not* override or call this function
|
||||||
|
"""
|
||||||
|
self.episodeRewards += deltaReward
|
||||||
|
self.update(state,action,nextState,deltaReward)
|
||||||
|
|
||||||
|
def startEpisode(self):
|
||||||
|
"""
|
||||||
|
Called by environment when new episode is starting
|
||||||
|
"""
|
||||||
|
self.lastState = None
|
||||||
|
self.lastAction = None
|
||||||
|
self.episodeRewards = 0.0
|
||||||
|
|
||||||
|
def stopEpisode(self):
|
||||||
|
"""
|
||||||
|
Called by environment when episode is done
|
||||||
|
"""
|
||||||
|
if self.episodesSoFar < self.numTraining:
|
||||||
|
self.accumTrainRewards += self.episodeRewards
|
||||||
|
else:
|
||||||
|
self.accumTestRewards += self.episodeRewards
|
||||||
|
self.episodesSoFar += 1
|
||||||
|
if self.episodesSoFar >= self.numTraining:
|
||||||
|
# Take off the training wheels
|
||||||
|
self.epsilon = 0.0 # no exploration
|
||||||
|
self.alpha = 0.0 # no learning
|
||||||
|
|
||||||
|
def isInTraining(self):
|
||||||
|
return self.episodesSoFar < self.numTraining
|
||||||
|
|
||||||
|
def isInTesting(self):
|
||||||
|
return not self.isInTraining()
|
||||||
|
|
||||||
|
def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1):
|
||||||
|
"""
|
||||||
|
actionFn: Function which takes a state and returns the list of legal actions
|
||||||
|
|
||||||
|
alpha - learning rate
|
||||||
|
epsilon - exploration rate
|
||||||
|
gamma - discount factor
|
||||||
|
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||||
|
"""
|
||||||
|
if actionFn == None:
|
||||||
|
actionFn = lambda state: state.getLegalActions()
|
||||||
|
self.actionFn = actionFn
|
||||||
|
self.episodesSoFar = 0
|
||||||
|
self.accumTrainRewards = 0.0
|
||||||
|
self.accumTestRewards = 0.0
|
||||||
|
self.numTraining = int(numTraining)
|
||||||
|
self.epsilon = float(epsilon)
|
||||||
|
self.alpha = float(alpha)
|
||||||
|
self.discount = float(gamma)
|
||||||
|
|
||||||
|
################################
|
||||||
|
# Controls needed for Crawler #
|
||||||
|
################################
|
||||||
|
def setEpsilon(self, epsilon):
|
||||||
|
self.epsilon = epsilon
|
||||||
|
|
||||||
|
def setLearningRate(self, alpha):
|
||||||
|
self.alpha = alpha
|
||||||
|
|
||||||
|
def setDiscount(self, discount):
|
||||||
|
self.discount = discount
|
||||||
|
|
||||||
|
def doAction(self,state,action):
|
||||||
|
"""
|
||||||
|
Called by inherited class when
|
||||||
|
an action is taken in a state
|
||||||
|
"""
|
||||||
|
self.lastState = state
|
||||||
|
self.lastAction = action
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Pacman Specific #
|
||||||
|
###################
|
||||||
|
def observationFunction(self, state):
|
||||||
|
"""
|
||||||
|
This is where we ended up after our last action.
|
||||||
|
The simulation should somehow ensure this is called
|
||||||
|
"""
|
||||||
|
if not self.lastState is None:
|
||||||
|
reward = state.getScore() - self.lastState.getScore()
|
||||||
|
self.observeTransition(self.lastState, self.lastAction, state, reward)
|
||||||
|
return state
|
||||||
|
|
||||||
|
def registerInitialState(self, state):
|
||||||
|
self.startEpisode()
|
||||||
|
if self.episodesSoFar == 0:
|
||||||
|
print 'Beginning %d episodes of Training' % (self.numTraining)
|
||||||
|
|
||||||
|
def final(self, state):
|
||||||
|
"""
|
||||||
|
Called by Pacman game at the terminal state
|
||||||
|
"""
|
||||||
|
deltaReward = state.getScore() - self.lastState.getScore()
|
||||||
|
self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
|
||||||
|
self.stopEpisode()
|
||||||
|
|
||||||
|
# Make sure we have this var
|
||||||
|
if not 'episodeStartTime' in self.__dict__:
|
||||||
|
self.episodeStartTime = time.time()
|
||||||
|
if not 'lastWindowAccumRewards' in self.__dict__:
|
||||||
|
self.lastWindowAccumRewards = 0.0
|
||||||
|
self.lastWindowAccumRewards += state.getScore()
|
||||||
|
|
||||||
|
NUM_EPS_UPDATE = 100
|
||||||
|
if self.episodesSoFar % NUM_EPS_UPDATE == 0:
|
||||||
|
print 'Reinforcement Learning Status:'
|
||||||
|
windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
|
||||||
|
if self.episodesSoFar <= self.numTraining:
|
||||||
|
trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
|
||||||
|
print '\tCompleted %d out of %d training episodes' % (
|
||||||
|
self.episodesSoFar,self.numTraining)
|
||||||
|
print '\tAverage Rewards over all training: %.2f' % (
|
||||||
|
trainAvg)
|
||||||
|
else:
|
||||||
|
testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
|
||||||
|
print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
|
||||||
|
print '\tAverage Rewards over testing: %.2f' % testAvg
|
||||||
|
print '\tAverage Rewards for last %d episodes: %.2f' % (
|
||||||
|
NUM_EPS_UPDATE,windowAvg)
|
||||||
|
print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
|
||||||
|
self.lastWindowAccumRewards = 0.0
|
||||||
|
self.episodeStartTime = time.time()
|
||||||
|
|
||||||
|
if self.episodesSoFar == self.numTraining:
|
||||||
|
msg = 'Training Done (turning off epsilon and alpha)'
|
||||||
|
print '%s\n%s' % (msg,'-' * len(msg))
|
||||||
67
p3_rl/mdp.py
Normal file
67
p3_rl/mdp.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
# mdp.py
|
||||||
|
# ------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
class MarkovDecisionProcess:
|
||||||
|
|
||||||
|
def getStates(self):
|
||||||
|
"""
|
||||||
|
Return a list of all states in the MDP.
|
||||||
|
Not generally possible for large MDPs.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def getStartState(self):
|
||||||
|
"""
|
||||||
|
Return the start state of the MDP.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def getPossibleActions(self, state):
|
||||||
|
"""
|
||||||
|
Return list of possible actions from 'state'.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def getTransitionStatesAndProbs(self, state, action):
|
||||||
|
"""
|
||||||
|
Returns list of (nextState, prob) pairs
|
||||||
|
representing the states reachable
|
||||||
|
from 'state' by taking 'action' along
|
||||||
|
with their transition probabilities.
|
||||||
|
|
||||||
|
Note that in Q-Learning and reinforcment
|
||||||
|
learning in general, we do not know these
|
||||||
|
probabilities nor do we directly model them.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def getReward(self, state, action, nextState):
|
||||||
|
"""
|
||||||
|
Get the reward for the state, action, nextState transition.
|
||||||
|
|
||||||
|
Not available in reinforcement learning.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
|
|
||||||
|
def isTerminal(self, state):
|
||||||
|
"""
|
||||||
|
Returns true if the current state is a terminal state. By convention,
|
||||||
|
a terminal state has zero future rewards. Sometimes the terminal state(s)
|
||||||
|
may have no possible actions. It is also common to think of the terminal
|
||||||
|
state as having a self-loop action 'pass' with zero reward; the formulations
|
||||||
|
are equivalent.
|
||||||
|
"""
|
||||||
|
abstract
|
||||||
684
p3_rl/pacman.py
Normal file
684
p3_rl/pacman.py
Normal file
@@ -0,0 +1,684 @@
|
|||||||
|
# pacman.py
|
||||||
|
# ---------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Pacman.py holds the logic for the classic pacman game along with the main
|
||||||
|
code to run a game. This file is divided into three sections:
|
||||||
|
|
||||||
|
(i) Your interface to the pacman world:
|
||||||
|
Pacman is a complex environment. You probably don't want to
|
||||||
|
read through all of the code we wrote to make the game runs
|
||||||
|
correctly. This section contains the parts of the code
|
||||||
|
that you will need to understand in order to complete the
|
||||||
|
project. There is also some code in game.py that you should
|
||||||
|
understand.
|
||||||
|
|
||||||
|
(ii) The hidden secrets of pacman:
|
||||||
|
This section contains all of the logic code that the pacman
|
||||||
|
environment uses to decide who can move where, who dies when
|
||||||
|
things collide, etc. You shouldn't need to read this section
|
||||||
|
of code, but you can if you want.
|
||||||
|
|
||||||
|
(iii) Framework to start a game:
|
||||||
|
The final section contains the code for reading the command
|
||||||
|
you use to set up the game, then starting up a new game, along with
|
||||||
|
linking in all the external parts (agent functions, graphics).
|
||||||
|
Check this section out to see all the options available to you.
|
||||||
|
|
||||||
|
To play your first game, type 'python pacman.py' from the command line.
|
||||||
|
The keys are 'a', 's', 'd', and 'w' to move (or arrow keys). Have fun!
|
||||||
|
"""
|
||||||
|
from game import GameStateData
|
||||||
|
from game import Game
|
||||||
|
from game import Directions
|
||||||
|
from game import Actions
|
||||||
|
from util import nearestPoint
|
||||||
|
from util import manhattanDistance
|
||||||
|
import util, layout
|
||||||
|
import sys, types, time, random, os
|
||||||
|
|
||||||
|
###################################################
|
||||||
|
# YOUR INTERFACE TO THE PACMAN WORLD: A GameState #
|
||||||
|
###################################################
|
||||||
|
|
||||||
|
class GameState:
|
||||||
|
"""
|
||||||
|
A GameState specifies the full game state, including the food, capsules,
|
||||||
|
agent configurations and score changes.
|
||||||
|
|
||||||
|
GameStates are used by the Game object to capture the actual state of the game and
|
||||||
|
can be used by agents to reason about the game.
|
||||||
|
|
||||||
|
Much of the information in a GameState is stored in a GameStateData object. We
|
||||||
|
strongly suggest that you access that data via the accessor methods below rather
|
||||||
|
than referring to the GameStateData object directly.
|
||||||
|
|
||||||
|
Note that in classic Pacman, Pacman is always agent 0.
|
||||||
|
"""
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
# Accessor methods: use these to access state data #
|
||||||
|
####################################################
|
||||||
|
|
||||||
|
# static variable keeps track of which states have had getLegalActions called
|
||||||
|
explored = set()
|
||||||
|
def getAndResetExplored():
|
||||||
|
tmp = GameState.explored.copy()
|
||||||
|
GameState.explored = set()
|
||||||
|
return tmp
|
||||||
|
getAndResetExplored = staticmethod(getAndResetExplored)
|
||||||
|
|
||||||
|
def getLegalActions( self, agentIndex=0 ):
|
||||||
|
"""
|
||||||
|
Returns the legal actions for the agent specified.
|
||||||
|
"""
|
||||||
|
# GameState.explored.add(self)
|
||||||
|
if self.isWin() or self.isLose(): return []
|
||||||
|
|
||||||
|
if agentIndex == 0: # Pacman is moving
|
||||||
|
return PacmanRules.getLegalActions( self )
|
||||||
|
else:
|
||||||
|
return GhostRules.getLegalActions( self, agentIndex )
|
||||||
|
|
||||||
|
def generateSuccessor( self, agentIndex, action):
|
||||||
|
"""
|
||||||
|
Returns the successor state after the specified agent takes the action.
|
||||||
|
"""
|
||||||
|
# Check that successors exist
|
||||||
|
if self.isWin() or self.isLose(): raise Exception('Can\'t generate a successor of a terminal state.')
|
||||||
|
|
||||||
|
# Copy current state
|
||||||
|
state = GameState(self)
|
||||||
|
|
||||||
|
# Let agent's logic deal with its action's effects on the board
|
||||||
|
if agentIndex == 0: # Pacman is moving
|
||||||
|
state.data._eaten = [False for i in range(state.getNumAgents())]
|
||||||
|
PacmanRules.applyAction( state, action )
|
||||||
|
else: # A ghost is moving
|
||||||
|
GhostRules.applyAction( state, action, agentIndex )
|
||||||
|
|
||||||
|
# Time passes
|
||||||
|
if agentIndex == 0:
|
||||||
|
state.data.scoreChange += -TIME_PENALTY # Penalty for waiting around
|
||||||
|
else:
|
||||||
|
GhostRules.decrementTimer( state.data.agentStates[agentIndex] )
|
||||||
|
|
||||||
|
# Resolve multi-agent effects
|
||||||
|
GhostRules.checkDeath( state, agentIndex )
|
||||||
|
|
||||||
|
# Book keeping
|
||||||
|
state.data._agentMoved = agentIndex
|
||||||
|
state.data.score += state.data.scoreChange
|
||||||
|
GameState.explored.add(self)
|
||||||
|
GameState.explored.add(state)
|
||||||
|
return state
|
||||||
|
|
||||||
|
def getLegalPacmanActions( self ):
|
||||||
|
return self.getLegalActions( 0 )
|
||||||
|
|
||||||
|
def generatePacmanSuccessor( self, action ):
|
||||||
|
"""
|
||||||
|
Generates the successor state after the specified pacman move
|
||||||
|
"""
|
||||||
|
return self.generateSuccessor( 0, action )
|
||||||
|
|
||||||
|
def getPacmanState( self ):
|
||||||
|
"""
|
||||||
|
Returns an AgentState object for pacman (in game.py)
|
||||||
|
|
||||||
|
state.pos gives the current position
|
||||||
|
state.direction gives the travel vector
|
||||||
|
"""
|
||||||
|
return self.data.agentStates[0].copy()
|
||||||
|
|
||||||
|
def getPacmanPosition( self ):
|
||||||
|
return self.data.agentStates[0].getPosition()
|
||||||
|
|
||||||
|
def getGhostStates( self ):
|
||||||
|
return self.data.agentStates[1:]
|
||||||
|
|
||||||
|
def getGhostState( self, agentIndex ):
|
||||||
|
if agentIndex == 0 or agentIndex >= self.getNumAgents():
|
||||||
|
raise Exception("Invalid index passed to getGhostState")
|
||||||
|
return self.data.agentStates[agentIndex]
|
||||||
|
|
||||||
|
def getGhostPosition( self, agentIndex ):
|
||||||
|
if agentIndex == 0:
|
||||||
|
raise Exception("Pacman's index passed to getGhostPosition")
|
||||||
|
return self.data.agentStates[agentIndex].getPosition()
|
||||||
|
|
||||||
|
def getGhostPositions(self):
|
||||||
|
return [s.getPosition() for s in self.getGhostStates()]
|
||||||
|
|
||||||
|
def getNumAgents( self ):
|
||||||
|
return len( self.data.agentStates )
|
||||||
|
|
||||||
|
def getScore( self ):
|
||||||
|
return float(self.data.score)
|
||||||
|
|
||||||
|
def getCapsules(self):
|
||||||
|
"""
|
||||||
|
Returns a list of positions (x,y) of the remaining capsules.
|
||||||
|
"""
|
||||||
|
return self.data.capsules
|
||||||
|
|
||||||
|
def getNumFood( self ):
|
||||||
|
return self.data.food.count()
|
||||||
|
|
||||||
|
def getFood(self):
|
||||||
|
"""
|
||||||
|
Returns a Grid of boolean food indicator variables.
|
||||||
|
|
||||||
|
Grids can be accessed via list notation, so to check
|
||||||
|
if there is food at (x,y), just call
|
||||||
|
|
||||||
|
currentFood = state.getFood()
|
||||||
|
if currentFood[x][y] == True: ...
|
||||||
|
"""
|
||||||
|
return self.data.food
|
||||||
|
|
||||||
|
def getWalls(self):
|
||||||
|
"""
|
||||||
|
Returns a Grid of boolean wall indicator variables.
|
||||||
|
|
||||||
|
Grids can be accessed via list notation, so to check
|
||||||
|
if there is a wall at (x,y), just call
|
||||||
|
|
||||||
|
walls = state.getWalls()
|
||||||
|
if walls[x][y] == True: ...
|
||||||
|
"""
|
||||||
|
return self.data.layout.walls
|
||||||
|
|
||||||
|
def hasFood(self, x, y):
|
||||||
|
return self.data.food[x][y]
|
||||||
|
|
||||||
|
def hasWall(self, x, y):
|
||||||
|
return self.data.layout.walls[x][y]
|
||||||
|
|
||||||
|
def isLose( self ):
|
||||||
|
return self.data._lose
|
||||||
|
|
||||||
|
def isWin( self ):
|
||||||
|
return self.data._win
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
# Helper methods: #
|
||||||
|
# You shouldn't need to call these directly #
|
||||||
|
#############################################
|
||||||
|
|
||||||
|
def __init__( self, prevState = None ):
|
||||||
|
"""
|
||||||
|
Generates a new state by copying information from its predecessor.
|
||||||
|
"""
|
||||||
|
if prevState != None: # Initial state
|
||||||
|
self.data = GameStateData(prevState.data)
|
||||||
|
else:
|
||||||
|
self.data = GameStateData()
|
||||||
|
|
||||||
|
def deepCopy( self ):
|
||||||
|
state = GameState( self )
|
||||||
|
state.data = self.data.deepCopy()
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __eq__( self, other ):
|
||||||
|
"""
|
||||||
|
Allows two states to be compared.
|
||||||
|
"""
|
||||||
|
return hasattr(other, 'data') and self.data == other.data
|
||||||
|
|
||||||
|
def __hash__( self ):
|
||||||
|
"""
|
||||||
|
Allows states to be keys of dictionaries.
|
||||||
|
"""
|
||||||
|
return hash( self.data )
|
||||||
|
|
||||||
|
def __str__( self ):
|
||||||
|
|
||||||
|
return str(self.data)
|
||||||
|
|
||||||
|
def initialize( self, layout, numGhostAgents=1000 ):
|
||||||
|
"""
|
||||||
|
Creates an initial game state from a layout array (see layout.py).
|
||||||
|
"""
|
||||||
|
self.data.initialize(layout, numGhostAgents)
|
||||||
|
|
||||||
|
############################################################################
|
||||||
|
# THE HIDDEN SECRETS OF PACMAN #
|
||||||
|
# #
|
||||||
|
# You shouldn't need to look through the code in this section of the file. #
|
||||||
|
############################################################################
|
||||||
|
|
||||||
|
SCARED_TIME = 40 # Moves ghosts are scared
|
||||||
|
COLLISION_TOLERANCE = 0.7 # How close ghosts must be to Pacman to kill
|
||||||
|
TIME_PENALTY = 1 # Number of points lost each round
|
||||||
|
|
||||||
|
class ClassicGameRules:
|
||||||
|
"""
|
||||||
|
These game rules manage the control flow of a game, deciding when
|
||||||
|
and how the game starts and ends.
|
||||||
|
"""
|
||||||
|
def __init__(self, timeout=30):
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False):
|
||||||
|
agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
|
||||||
|
initState = GameState()
|
||||||
|
initState.initialize( layout, len(ghostAgents) )
|
||||||
|
game = Game(agents, display, self, catchExceptions=catchExceptions)
|
||||||
|
game.state = initState
|
||||||
|
self.initialState = initState.deepCopy()
|
||||||
|
self.quiet = quiet
|
||||||
|
return game
|
||||||
|
|
||||||
|
def process(self, state, game):
|
||||||
|
"""
|
||||||
|
Checks to see whether it is time to end the game.
|
||||||
|
"""
|
||||||
|
if state.isWin(): self.win(state, game)
|
||||||
|
if state.isLose(): self.lose(state, game)
|
||||||
|
|
||||||
|
def win( self, state, game ):
|
||||||
|
if not self.quiet: print "Pacman emerges victorious! Score: %d" % state.data.score
|
||||||
|
game.gameOver = True
|
||||||
|
|
||||||
|
def lose( self, state, game ):
|
||||||
|
if not self.quiet: print "Pacman died! Score: %d" % state.data.score
|
||||||
|
game.gameOver = True
|
||||||
|
|
||||||
|
def getProgress(self, game):
|
||||||
|
return float(game.state.getNumFood()) / self.initialState.getNumFood()
|
||||||
|
|
||||||
|
def agentCrash(self, game, agentIndex):
|
||||||
|
if agentIndex == 0:
|
||||||
|
print "Pacman crashed"
|
||||||
|
else:
|
||||||
|
print "A ghost crashed"
|
||||||
|
|
||||||
|
def getMaxTotalTime(self, agentIndex):
|
||||||
|
return self.timeout
|
||||||
|
|
||||||
|
def getMaxStartupTime(self, agentIndex):
|
||||||
|
return self.timeout
|
||||||
|
|
||||||
|
def getMoveWarningTime(self, agentIndex):
|
||||||
|
return self.timeout
|
||||||
|
|
||||||
|
def getMoveTimeout(self, agentIndex):
|
||||||
|
return self.timeout
|
||||||
|
|
||||||
|
def getMaxTimeWarnings(self, agentIndex):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
class PacmanRules:
|
||||||
|
"""
|
||||||
|
These functions govern how pacman interacts with his environment under
|
||||||
|
the classic game rules.
|
||||||
|
"""
|
||||||
|
PACMAN_SPEED=1
|
||||||
|
|
||||||
|
def getLegalActions( state ):
|
||||||
|
"""
|
||||||
|
Returns a list of possible actions.
|
||||||
|
"""
|
||||||
|
return Actions.getPossibleActions( state.getPacmanState().configuration, state.data.layout.walls )
|
||||||
|
getLegalActions = staticmethod( getLegalActions )
|
||||||
|
|
||||||
|
def applyAction( state, action ):
|
||||||
|
"""
|
||||||
|
Edits the state to reflect the results of the action.
|
||||||
|
"""
|
||||||
|
legal = PacmanRules.getLegalActions( state )
|
||||||
|
if action not in legal:
|
||||||
|
raise Exception("Illegal action " + str(action))
|
||||||
|
|
||||||
|
pacmanState = state.data.agentStates[0]
|
||||||
|
|
||||||
|
# Update Configuration
|
||||||
|
vector = Actions.directionToVector( action, PacmanRules.PACMAN_SPEED )
|
||||||
|
pacmanState.configuration = pacmanState.configuration.generateSuccessor( vector )
|
||||||
|
|
||||||
|
# Eat
|
||||||
|
next = pacmanState.configuration.getPosition()
|
||||||
|
nearest = nearestPoint( next )
|
||||||
|
if manhattanDistance( nearest, next ) <= 0.5 :
|
||||||
|
# Remove food
|
||||||
|
PacmanRules.consume( nearest, state )
|
||||||
|
applyAction = staticmethod( applyAction )
|
||||||
|
|
||||||
|
def consume( position, state ):
|
||||||
|
x,y = position
|
||||||
|
# Eat food
|
||||||
|
if state.data.food[x][y]:
|
||||||
|
state.data.scoreChange += 10
|
||||||
|
state.data.food = state.data.food.copy()
|
||||||
|
state.data.food[x][y] = False
|
||||||
|
state.data._foodEaten = position
|
||||||
|
# TODO: cache numFood?
|
||||||
|
numFood = state.getNumFood()
|
||||||
|
if numFood == 0 and not state.data._lose:
|
||||||
|
state.data.scoreChange += 500
|
||||||
|
state.data._win = True
|
||||||
|
# Eat capsule
|
||||||
|
if( position in state.getCapsules() ):
|
||||||
|
state.data.capsules.remove( position )
|
||||||
|
state.data._capsuleEaten = position
|
||||||
|
# Reset all ghosts' scared timers
|
||||||
|
for index in range( 1, len( state.data.agentStates ) ):
|
||||||
|
state.data.agentStates[index].scaredTimer = SCARED_TIME
|
||||||
|
consume = staticmethod( consume )
|
||||||
|
|
||||||
|
class GhostRules:
|
||||||
|
"""
|
||||||
|
These functions dictate how ghosts interact with their environment.
|
||||||
|
"""
|
||||||
|
GHOST_SPEED=1.0
|
||||||
|
def getLegalActions( state, ghostIndex ):
|
||||||
|
"""
|
||||||
|
Ghosts cannot stop, and cannot turn around unless they
|
||||||
|
reach a dead end, but can turn 90 degrees at intersections.
|
||||||
|
"""
|
||||||
|
conf = state.getGhostState( ghostIndex ).configuration
|
||||||
|
possibleActions = Actions.getPossibleActions( conf, state.data.layout.walls )
|
||||||
|
reverse = Actions.reverseDirection( conf.direction )
|
||||||
|
if Directions.STOP in possibleActions:
|
||||||
|
possibleActions.remove( Directions.STOP )
|
||||||
|
if reverse in possibleActions and len( possibleActions ) > 1:
|
||||||
|
possibleActions.remove( reverse )
|
||||||
|
return possibleActions
|
||||||
|
getLegalActions = staticmethod( getLegalActions )
|
||||||
|
|
||||||
|
def applyAction( state, action, ghostIndex):
|
||||||
|
|
||||||
|
legal = GhostRules.getLegalActions( state, ghostIndex )
|
||||||
|
if action not in legal:
|
||||||
|
raise Exception("Illegal ghost action " + str(action))
|
||||||
|
|
||||||
|
ghostState = state.data.agentStates[ghostIndex]
|
||||||
|
speed = GhostRules.GHOST_SPEED
|
||||||
|
if ghostState.scaredTimer > 0: speed /= 2.0
|
||||||
|
vector = Actions.directionToVector( action, speed )
|
||||||
|
ghostState.configuration = ghostState.configuration.generateSuccessor( vector )
|
||||||
|
applyAction = staticmethod( applyAction )
|
||||||
|
|
||||||
|
def decrementTimer( ghostState):
|
||||||
|
timer = ghostState.scaredTimer
|
||||||
|
if timer == 1:
|
||||||
|
ghostState.configuration.pos = nearestPoint( ghostState.configuration.pos )
|
||||||
|
ghostState.scaredTimer = max( 0, timer - 1 )
|
||||||
|
decrementTimer = staticmethod( decrementTimer )
|
||||||
|
|
||||||
|
def checkDeath( state, agentIndex):
|
||||||
|
pacmanPosition = state.getPacmanPosition()
|
||||||
|
if agentIndex == 0: # Pacman just moved; Anyone can kill him
|
||||||
|
for index in range( 1, len( state.data.agentStates ) ):
|
||||||
|
ghostState = state.data.agentStates[index]
|
||||||
|
ghostPosition = ghostState.configuration.getPosition()
|
||||||
|
if GhostRules.canKill( pacmanPosition, ghostPosition ):
|
||||||
|
GhostRules.collide( state, ghostState, index )
|
||||||
|
else:
|
||||||
|
ghostState = state.data.agentStates[agentIndex]
|
||||||
|
ghostPosition = ghostState.configuration.getPosition()
|
||||||
|
if GhostRules.canKill( pacmanPosition, ghostPosition ):
|
||||||
|
GhostRules.collide( state, ghostState, agentIndex )
|
||||||
|
checkDeath = staticmethod( checkDeath )
|
||||||
|
|
||||||
|
def collide( state, ghostState, agentIndex):
|
||||||
|
if ghostState.scaredTimer > 0:
|
||||||
|
state.data.scoreChange += 200
|
||||||
|
GhostRules.placeGhost(state, ghostState)
|
||||||
|
ghostState.scaredTimer = 0
|
||||||
|
# Added for first-person
|
||||||
|
state.data._eaten[agentIndex] = True
|
||||||
|
else:
|
||||||
|
if not state.data._win:
|
||||||
|
state.data.scoreChange -= 500
|
||||||
|
state.data._lose = True
|
||||||
|
collide = staticmethod( collide )
|
||||||
|
|
||||||
|
def canKill( pacmanPosition, ghostPosition ):
|
||||||
|
return manhattanDistance( ghostPosition, pacmanPosition ) <= COLLISION_TOLERANCE
|
||||||
|
canKill = staticmethod( canKill )
|
||||||
|
|
||||||
|
def placeGhost(state, ghostState):
|
||||||
|
ghostState.configuration = ghostState.start
|
||||||
|
placeGhost = staticmethod( placeGhost )
|
||||||
|
|
||||||
|
#############################
|
||||||
|
# FRAMEWORK TO START A GAME #
|
||||||
|
#############################
|
||||||
|
|
||||||
|
def default(str):
|
||||||
|
return str + ' [Default: %default]'
|
||||||
|
|
||||||
|
def parseAgentArgs(str):
|
||||||
|
if str == None: return {}
|
||||||
|
pieces = str.split(',')
|
||||||
|
opts = {}
|
||||||
|
for p in pieces:
|
||||||
|
if '=' in p:
|
||||||
|
key, val = p.split('=')
|
||||||
|
else:
|
||||||
|
key,val = p, 1
|
||||||
|
opts[key] = val
|
||||||
|
return opts
|
||||||
|
|
||||||
|
def readCommand( argv ):
|
||||||
|
"""
|
||||||
|
Processes the command used to run pacman from the command line.
|
||||||
|
"""
|
||||||
|
from optparse import OptionParser
|
||||||
|
usageStr = """
|
||||||
|
USAGE: python pacman.py <options>
|
||||||
|
EXAMPLES: (1) python pacman.py
|
||||||
|
- starts an interactive game
|
||||||
|
(2) python pacman.py --layout smallClassic --zoom 2
|
||||||
|
OR python pacman.py -l smallClassic -z 2
|
||||||
|
- starts an interactive game on a smaller board, zoomed in
|
||||||
|
"""
|
||||||
|
parser = OptionParser(usageStr)
|
||||||
|
|
||||||
|
parser.add_option('-n', '--numGames', dest='numGames', type='int',
|
||||||
|
help=default('the number of GAMES to play'), metavar='GAMES', default=1)
|
||||||
|
parser.add_option('-l', '--layout', dest='layout',
|
||||||
|
help=default('the LAYOUT_FILE from which to load the map layout'),
|
||||||
|
metavar='LAYOUT_FILE', default='mediumClassic')
|
||||||
|
parser.add_option('-p', '--pacman', dest='pacman',
|
||||||
|
help=default('the agent TYPE in the pacmanAgents module to use'),
|
||||||
|
metavar='TYPE', default='KeyboardAgent')
|
||||||
|
parser.add_option('-t', '--textGraphics', action='store_true', dest='textGraphics',
|
||||||
|
help='Display output as text only', default=False)
|
||||||
|
parser.add_option('-q', '--quietTextGraphics', action='store_true', dest='quietGraphics',
|
||||||
|
help='Generate minimal output and no graphics', default=False)
|
||||||
|
parser.add_option('-g', '--ghosts', dest='ghost',
|
||||||
|
help=default('the ghost agent TYPE in the ghostAgents module to use'),
|
||||||
|
metavar = 'TYPE', default='RandomGhost')
|
||||||
|
parser.add_option('-k', '--numghosts', type='int', dest='numGhosts',
|
||||||
|
help=default('The maximum number of ghosts to use'), default=4)
|
||||||
|
parser.add_option('-z', '--zoom', type='float', dest='zoom',
|
||||||
|
help=default('Zoom the size of the graphics window'), default=1.0)
|
||||||
|
parser.add_option('-f', '--fixRandomSeed', action='store_true', dest='fixRandomSeed',
|
||||||
|
help='Fixes the random seed to always play the same game', default=False)
|
||||||
|
parser.add_option('-r', '--recordActions', action='store_true', dest='record',
|
||||||
|
help='Writes game histories to a file (named by the time they were played)', default=False)
|
||||||
|
parser.add_option('--replay', dest='gameToReplay',
|
||||||
|
help='A recorded game file (pickle) to replay', default=None)
|
||||||
|
parser.add_option('-a','--agentArgs',dest='agentArgs',
|
||||||
|
help='Comma separated values sent to agent. e.g. "opt1=val1,opt2,opt3=val3"')
|
||||||
|
parser.add_option('-x', '--numTraining', dest='numTraining', type='int',
|
||||||
|
help=default('How many episodes are training (suppresses output)'), default=0)
|
||||||
|
parser.add_option('--frameTime', dest='frameTime', type='float',
|
||||||
|
help=default('Time to delay between frames; <0 means keyboard'), default=0.1)
|
||||||
|
parser.add_option('-c', '--catchExceptions', action='store_true', dest='catchExceptions',
|
||||||
|
help='Turns on exception handling and timeouts during games', default=False)
|
||||||
|
parser.add_option('--timeout', dest='timeout', type='int',
|
||||||
|
help=default('Maximum length of time an agent can spend computing in a single game'), default=30)
|
||||||
|
|
||||||
|
options, otherjunk = parser.parse_args(argv)
|
||||||
|
if len(otherjunk) != 0:
|
||||||
|
raise Exception('Command line input not understood: ' + str(otherjunk))
|
||||||
|
args = dict()
|
||||||
|
|
||||||
|
# Fix the random seed
|
||||||
|
if options.fixRandomSeed: random.seed('cs188')
|
||||||
|
|
||||||
|
# Choose a layout
|
||||||
|
args['layout'] = layout.getLayout( options.layout )
|
||||||
|
if args['layout'] == None: raise Exception("The layout " + options.layout + " cannot be found")
|
||||||
|
|
||||||
|
# Choose a Pacman agent
|
||||||
|
noKeyboard = options.gameToReplay == None and (options.textGraphics or options.quietGraphics)
|
||||||
|
pacmanType = loadAgent(options.pacman, noKeyboard)
|
||||||
|
agentOpts = parseAgentArgs(options.agentArgs)
|
||||||
|
if options.numTraining > 0:
|
||||||
|
args['numTraining'] = options.numTraining
|
||||||
|
if 'numTraining' not in agentOpts: agentOpts['numTraining'] = options.numTraining
|
||||||
|
pacman = pacmanType(**agentOpts) # Instantiate Pacman with agentArgs
|
||||||
|
args['pacman'] = pacman
|
||||||
|
|
||||||
|
# Don't display training games
|
||||||
|
if 'numTrain' in agentOpts:
|
||||||
|
options.numQuiet = int(agentOpts['numTrain'])
|
||||||
|
options.numIgnore = int(agentOpts['numTrain'])
|
||||||
|
|
||||||
|
# Choose a ghost agent
|
||||||
|
ghostType = loadAgent(options.ghost, noKeyboard)
|
||||||
|
args['ghosts'] = [ghostType( i+1 ) for i in range( options.numGhosts )]
|
||||||
|
|
||||||
|
# Choose a display format
|
||||||
|
if options.quietGraphics:
|
||||||
|
import textDisplay
|
||||||
|
args['display'] = textDisplay.NullGraphics()
|
||||||
|
elif options.textGraphics:
|
||||||
|
import textDisplay
|
||||||
|
textDisplay.SLEEP_TIME = options.frameTime
|
||||||
|
args['display'] = textDisplay.PacmanGraphics()
|
||||||
|
else:
|
||||||
|
import graphicsDisplay
|
||||||
|
args['display'] = graphicsDisplay.PacmanGraphics(options.zoom, frameTime = options.frameTime)
|
||||||
|
args['numGames'] = options.numGames
|
||||||
|
args['record'] = options.record
|
||||||
|
args['catchExceptions'] = options.catchExceptions
|
||||||
|
args['timeout'] = options.timeout
|
||||||
|
|
||||||
|
# Special case: recorded games don't use the runGames method or args structure
|
||||||
|
if options.gameToReplay != None:
|
||||||
|
print 'Replaying recorded game %s.' % options.gameToReplay
|
||||||
|
import cPickle
|
||||||
|
f = open(options.gameToReplay)
|
||||||
|
try: recorded = cPickle.load(f)
|
||||||
|
finally: f.close()
|
||||||
|
recorded['display'] = args['display']
|
||||||
|
replayGame(**recorded)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
def loadAgent(pacman, nographics):
|
||||||
|
# Looks through all pythonPath Directories for the right module,
|
||||||
|
pythonPathStr = os.path.expandvars("$PYTHONPATH")
|
||||||
|
if pythonPathStr.find(';') == -1:
|
||||||
|
pythonPathDirs = pythonPathStr.split(':')
|
||||||
|
else:
|
||||||
|
pythonPathDirs = pythonPathStr.split(';')
|
||||||
|
pythonPathDirs.append('.')
|
||||||
|
|
||||||
|
for moduleDir in pythonPathDirs:
|
||||||
|
if not os.path.isdir(moduleDir): continue
|
||||||
|
moduleNames = [f for f in os.listdir(moduleDir) if f.endswith('gents.py')]
|
||||||
|
for modulename in moduleNames:
|
||||||
|
try:
|
||||||
|
module = __import__(modulename[:-3])
|
||||||
|
except ImportError:
|
||||||
|
continue
|
||||||
|
if pacman in dir(module):
|
||||||
|
if nographics and modulename == 'keyboardAgents.py':
|
||||||
|
raise Exception('Using the keyboard requires graphics (not text display)')
|
||||||
|
return getattr(module, pacman)
|
||||||
|
raise Exception('The agent ' + pacman + ' is not specified in any *Agents.py.')
|
||||||
|
|
||||||
|
def replayGame( layout, actions, display ):
|
||||||
|
import pacmanAgents, ghostAgents
|
||||||
|
rules = ClassicGameRules()
|
||||||
|
agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(layout.getNumGhosts())]
|
||||||
|
game = rules.newGame( layout, agents[0], agents[1:], display )
|
||||||
|
state = game.state
|
||||||
|
display.initialize(state.data)
|
||||||
|
|
||||||
|
for action in actions:
|
||||||
|
# Execute the action
|
||||||
|
state = state.generateSuccessor( *action )
|
||||||
|
# Change the display
|
||||||
|
display.update( state.data )
|
||||||
|
# Allow for game specific conditions (winning, losing, etc.)
|
||||||
|
rules.process(state, game)
|
||||||
|
|
||||||
|
display.finish()
|
||||||
|
|
||||||
|
def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30 ):
|
||||||
|
import __main__
|
||||||
|
__main__.__dict__['_display'] = display
|
||||||
|
|
||||||
|
rules = ClassicGameRules(timeout)
|
||||||
|
games = []
|
||||||
|
|
||||||
|
for i in range( numGames ):
|
||||||
|
beQuiet = i < numTraining
|
||||||
|
if beQuiet:
|
||||||
|
# Suppress output and graphics
|
||||||
|
import textDisplay
|
||||||
|
gameDisplay = textDisplay.NullGraphics()
|
||||||
|
rules.quiet = True
|
||||||
|
else:
|
||||||
|
gameDisplay = display
|
||||||
|
rules.quiet = False
|
||||||
|
game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions)
|
||||||
|
game.run()
|
||||||
|
if not beQuiet: games.append(game)
|
||||||
|
|
||||||
|
if record:
|
||||||
|
import time, cPickle
|
||||||
|
fname = ('recorded-game-%d' % (i + 1)) + '-'.join([str(t) for t in time.localtime()[1:6]])
|
||||||
|
f = file(fname, 'w')
|
||||||
|
components = {'layout': layout, 'actions': game.moveHistory}
|
||||||
|
cPickle.dump(components, f)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
if (numGames-numTraining) > 0:
|
||||||
|
scores = [game.state.getScore() for game in games]
|
||||||
|
wins = [game.state.isWin() for game in games]
|
||||||
|
winRate = wins.count(True)/ float(len(wins))
|
||||||
|
print 'Average Score:', sum(scores) / float(len(scores))
|
||||||
|
print 'Scores: ', ', '.join([str(score) for score in scores])
|
||||||
|
print 'Win Rate: %d/%d (%.2f)' % (wins.count(True), len(wins), winRate)
|
||||||
|
print 'Record: ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins])
|
||||||
|
|
||||||
|
return games
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
"""
|
||||||
|
The main function called when pacman.py is run
|
||||||
|
from the command line:
|
||||||
|
|
||||||
|
> python pacman.py
|
||||||
|
|
||||||
|
See the usage string for more details.
|
||||||
|
|
||||||
|
> python pacman.py --help
|
||||||
|
"""
|
||||||
|
args = readCommand( sys.argv[1:] ) # Get game components based on input
|
||||||
|
runGames( **args )
|
||||||
|
|
||||||
|
# import cProfile
|
||||||
|
# cProfile.run("runGames( **args )")
|
||||||
|
pass
|
||||||
52
p3_rl/pacmanAgents.py
Normal file
52
p3_rl/pacmanAgents.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# pacmanAgents.py
|
||||||
|
# ---------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from pacman import Directions
|
||||||
|
from game import Agent
|
||||||
|
import random
|
||||||
|
import game
|
||||||
|
import util
|
||||||
|
|
||||||
|
class LeftTurnAgent(game.Agent):
|
||||||
|
"An agent that turns left at every opportunity"
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
legal = state.getLegalPacmanActions()
|
||||||
|
current = state.getPacmanState().configuration.direction
|
||||||
|
if current == Directions.STOP: current = Directions.NORTH
|
||||||
|
left = Directions.LEFT[current]
|
||||||
|
if left in legal: return left
|
||||||
|
if current in legal: return current
|
||||||
|
if Directions.RIGHT[current] in legal: return Directions.RIGHT[current]
|
||||||
|
if Directions.LEFT[left] in legal: return Directions.LEFT[left]
|
||||||
|
return Directions.STOP
|
||||||
|
|
||||||
|
class GreedyAgent(Agent):
|
||||||
|
def __init__(self, evalFn="scoreEvaluation"):
|
||||||
|
self.evaluationFunction = util.lookup(evalFn, globals())
|
||||||
|
assert self.evaluationFunction != None
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
# Generate candidate actions
|
||||||
|
legal = state.getLegalPacmanActions()
|
||||||
|
if Directions.STOP in legal: legal.remove(Directions.STOP)
|
||||||
|
|
||||||
|
successors = [(state.generateSuccessor(0, action), action) for action in legal]
|
||||||
|
scored = [(self.evaluationFunction(state), action) for state, action in successors]
|
||||||
|
bestScore = max(scored)[0]
|
||||||
|
bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
|
||||||
|
return random.choice(bestActions)
|
||||||
|
|
||||||
|
def scoreEvaluation(state):
|
||||||
|
return state.getScore()
|
||||||
18
p3_rl/projectParams.py
Normal file
18
p3_rl/projectParams.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# projectParams.py
|
||||||
|
# ----------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py'
|
||||||
|
PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py'
|
||||||
|
PROJECT_NAME = 'Project 3: Reinforcement learning'
|
||||||
|
BONUS_PIC = False
|
||||||
186
p3_rl/qlearningAgents.py
Normal file
186
p3_rl/qlearningAgents.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
# qlearningAgents.py
|
||||||
|
# ------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
from game import *
|
||||||
|
from learningAgents import ReinforcementAgent
|
||||||
|
from featureExtractors import *
|
||||||
|
|
||||||
|
import random,util,math
|
||||||
|
|
||||||
|
class QLearningAgent(ReinforcementAgent):
|
||||||
|
"""
|
||||||
|
Q-Learning Agent
|
||||||
|
|
||||||
|
Functions you should fill in:
|
||||||
|
- computeValueFromQValues
|
||||||
|
- computeActionFromQValues
|
||||||
|
- getQValue
|
||||||
|
- getAction
|
||||||
|
- update
|
||||||
|
|
||||||
|
Instance variables you have access to
|
||||||
|
- self.epsilon (exploration prob)
|
||||||
|
- self.alpha (learning rate)
|
||||||
|
- self.discount (discount rate)
|
||||||
|
|
||||||
|
Functions you should use
|
||||||
|
- self.getLegalActions(state)
|
||||||
|
which returns legal actions for a state
|
||||||
|
"""
|
||||||
|
def __init__(self, **args):
|
||||||
|
"You can initialize Q-values here..."
|
||||||
|
ReinforcementAgent.__init__(self, **args)
|
||||||
|
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
|
||||||
|
def getQValue(self, state, action):
|
||||||
|
"""
|
||||||
|
Returns Q(state,action)
|
||||||
|
Should return 0.0 if we have never seen a state
|
||||||
|
or the Q node value otherwise
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
|
||||||
|
def computeValueFromQValues(self, state):
|
||||||
|
"""
|
||||||
|
Returns max_action Q(state,action)
|
||||||
|
where the max is over legal actions. Note that if
|
||||||
|
there are no legal actions, which is the case at the
|
||||||
|
terminal state, you should return a value of 0.0.
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def computeActionFromQValues(self, state):
|
||||||
|
"""
|
||||||
|
Compute the best action to take in a state. Note that if there
|
||||||
|
are no legal actions, which is the case at the terminal state,
|
||||||
|
you should return None.
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
"""
|
||||||
|
Compute the action to take in the current state. With
|
||||||
|
probability self.epsilon, we should take a random action and
|
||||||
|
take the best policy action otherwise. Note that if there are
|
||||||
|
no legal actions, which is the case at the terminal state, you
|
||||||
|
should choose None as the action.
|
||||||
|
|
||||||
|
HINT: You might want to use util.flipCoin(prob)
|
||||||
|
HINT: To pick randomly from a list, use random.choice(list)
|
||||||
|
"""
|
||||||
|
# Pick Action
|
||||||
|
legalActions = self.getLegalActions(state)
|
||||||
|
action = None
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
def update(self, state, action, nextState, reward):
|
||||||
|
"""
|
||||||
|
The parent class calls this to observe a
|
||||||
|
state = action => nextState and reward transition.
|
||||||
|
You should do your Q-Value update here
|
||||||
|
|
||||||
|
NOTE: You should never call this function,
|
||||||
|
it will be called on your behalf
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getPolicy(self, state):
|
||||||
|
return self.computeActionFromQValues(state)
|
||||||
|
|
||||||
|
def getValue(self, state):
|
||||||
|
return self.computeValueFromQValues(state)
|
||||||
|
|
||||||
|
|
||||||
|
class PacmanQAgent(QLearningAgent):
|
||||||
|
"Exactly the same as QLearningAgent, but with different default parameters"
|
||||||
|
|
||||||
|
def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
|
||||||
|
"""
|
||||||
|
These default parameters can be changed from the pacman.py command line.
|
||||||
|
For example, to change the exploration rate, try:
|
||||||
|
python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
|
||||||
|
|
||||||
|
alpha - learning rate
|
||||||
|
epsilon - exploration rate
|
||||||
|
gamma - discount factor
|
||||||
|
numTraining - number of training episodes, i.e. no learning after these many episodes
|
||||||
|
"""
|
||||||
|
args['epsilon'] = epsilon
|
||||||
|
args['gamma'] = gamma
|
||||||
|
args['alpha'] = alpha
|
||||||
|
args['numTraining'] = numTraining
|
||||||
|
self.index = 0 # This is always Pacman
|
||||||
|
QLearningAgent.__init__(self, **args)
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
"""
|
||||||
|
Simply calls the getAction method of QLearningAgent and then
|
||||||
|
informs parent of action for Pacman. Do not change or remove this
|
||||||
|
method.
|
||||||
|
"""
|
||||||
|
action = QLearningAgent.getAction(self,state)
|
||||||
|
self.doAction(state,action)
|
||||||
|
return action
|
||||||
|
|
||||||
|
|
||||||
|
class ApproximateQAgent(PacmanQAgent):
|
||||||
|
"""
|
||||||
|
ApproximateQLearningAgent
|
||||||
|
|
||||||
|
You should only have to overwrite getQValue
|
||||||
|
and update. All other QLearningAgent functions
|
||||||
|
should work as is.
|
||||||
|
"""
|
||||||
|
def __init__(self, extractor='IdentityExtractor', **args):
|
||||||
|
self.featExtractor = util.lookup(extractor, globals())()
|
||||||
|
PacmanQAgent.__init__(self, **args)
|
||||||
|
self.weights = util.Counter()
|
||||||
|
|
||||||
|
def getWeights(self):
|
||||||
|
return self.weights
|
||||||
|
|
||||||
|
def getQValue(self, state, action):
|
||||||
|
"""
|
||||||
|
Should return Q(state,action) = w * featureVector
|
||||||
|
where * is the dotProduct operator
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def update(self, state, action, nextState, reward):
|
||||||
|
"""
|
||||||
|
Should update your weights based on transition
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def final(self, state):
|
||||||
|
"Called at the end of each game."
|
||||||
|
# call the super-class final method
|
||||||
|
PacmanQAgent.final(self, state)
|
||||||
|
|
||||||
|
# did we finish training?
|
||||||
|
if self.episodesSoFar == self.numTraining:
|
||||||
|
# you might want to print your weights here for debugging
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
pass
|
||||||
924
p3_rl/reinforcementTestClasses.py
Normal file
924
p3_rl/reinforcementTestClasses.py
Normal file
@@ -0,0 +1,924 @@
|
|||||||
|
# reinforcementTestClasses.py
|
||||||
|
# ---------------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import testClasses
|
||||||
|
import random, math, traceback, sys, os
|
||||||
|
import layout, textDisplay, pacman, gridworld
|
||||||
|
import time
|
||||||
|
from util import Counter, TimeoutFunction, FixedRandom
|
||||||
|
from collections import defaultdict
|
||||||
|
from pprint import PrettyPrinter
|
||||||
|
from hashlib import sha1
|
||||||
|
pp = PrettyPrinter()
|
||||||
|
VERBOSE = False
|
||||||
|
|
||||||
|
import gridworld
|
||||||
|
|
||||||
|
LIVINGREWARD = -0.1
|
||||||
|
NOISE = 0.2
|
||||||
|
|
||||||
|
class ValueIterationTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(ValueIterationTest, self).__init__(question, testDict)
|
||||||
|
self.discount = float(testDict['discount'])
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
iterations = int(testDict['valueIterations'])
|
||||||
|
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||||
|
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||||
|
maxPreIterations = 10
|
||||||
|
self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
|
||||||
|
self.testOutFile = testDict['test_out_file']
|
||||||
|
if maxPreIterations < iterations:
|
||||||
|
self.numsIterationsForDisplay.append(iterations)
|
||||||
|
|
||||||
|
def writeFailureFile(self, string):
|
||||||
|
with open(self.testOutFile, 'w') as handle:
|
||||||
|
handle.write(string)
|
||||||
|
|
||||||
|
def removeFailureFileIfExists(self):
|
||||||
|
if os.path.exists(self.testOutFile):
|
||||||
|
os.remove(self.testOutFile)
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
failureOutputFileString = ''
|
||||||
|
failureOutputStdString = ''
|
||||||
|
for n in self.numsIterationsForDisplay:
|
||||||
|
checkPolicy = (n == self.numsIterationsForDisplay[-1])
|
||||||
|
testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
|
||||||
|
failureOutputStdString += stdOutString
|
||||||
|
failureOutputFileString += fileOutString
|
||||||
|
if not testPass:
|
||||||
|
self.addMessage(failureOutputStdString)
|
||||||
|
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||||
|
self.writeFailureFile(failureOutputFileString)
|
||||||
|
return self.testFail(grades)
|
||||||
|
self.removeFailureFileIfExists()
|
||||||
|
return self.testPass(grades)
|
||||||
|
|
||||||
|
def executeNIterations(self, grades, moduleDict, solutionDict, n, checkPolicy):
|
||||||
|
testPass = True
|
||||||
|
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
|
||||||
|
stdOutString = ''
|
||||||
|
fileOutString = ''
|
||||||
|
valuesKey = "values_k_%d" % n
|
||||||
|
if self.comparePrettyValues(valuesPretty, solutionDict[valuesKey]):
|
||||||
|
fileOutString += "Values at iteration %d are correct.\n" % n
|
||||||
|
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
|
||||||
|
else:
|
||||||
|
testPass = False
|
||||||
|
outString = "Values at iteration %d are NOT correct.\n" % n
|
||||||
|
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, valuesPretty)
|
||||||
|
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(valuesKey, solutionDict[valuesKey])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
for action in actions:
|
||||||
|
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||||
|
qValues = qValuesPretty[action]
|
||||||
|
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||||
|
fileOutString += "Q-Values at iteration %d for action %s are correct.\n" % (n, action)
|
||||||
|
fileOutString += " Student/correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
else:
|
||||||
|
testPass = False
|
||||||
|
outString = "Q-Values at iteration %d for action %s are NOT correct.\n" % (n, action)
|
||||||
|
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
if checkPolicy:
|
||||||
|
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
|
||||||
|
testPass = False
|
||||||
|
outString = "Policy is NOT correct.\n"
|
||||||
|
outString += " Student solution:\n %s\n" % self.prettyValueSolutionString('policy', policyPretty)
|
||||||
|
outString += " Correct solution:\n %s\n" % self.prettyValueSolutionString('policy', solutionDict['policy'])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
return testPass, stdOutString, fileOutString
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
policyPretty = ''
|
||||||
|
actions = []
|
||||||
|
for n in self.numsIterationsForDisplay:
|
||||||
|
valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
|
||||||
|
handle.write(self.prettyValueSolutionString('values_k_%d' % n, valuesPretty))
|
||||||
|
for action in actions:
|
||||||
|
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||||
|
handle.write(self.prettyValueSolutionString('policy', policyPretty))
|
||||||
|
handle.write(self.prettyValueSolutionString('actions', '\n'.join(actions) + '\n'))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def runAgent(self, moduleDict, numIterations):
|
||||||
|
agent = moduleDict['valueIterationAgents'].ValueIterationAgent(self.grid, discount=self.discount, iterations=numIterations)
|
||||||
|
states = self.grid.getStates()
|
||||||
|
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||||
|
values = {}
|
||||||
|
qValues = {}
|
||||||
|
policy = {}
|
||||||
|
for state in states:
|
||||||
|
values[state] = agent.getValue(state)
|
||||||
|
policy[state] = agent.computeActionFromValues(state)
|
||||||
|
possibleActions = self.grid.getPossibleActions(state)
|
||||||
|
for action in actions:
|
||||||
|
if not qValues.has_key(action):
|
||||||
|
qValues[action] = {}
|
||||||
|
if action in possibleActions:
|
||||||
|
qValues[action][state] = agent.computeQValueFromValues(state, action)
|
||||||
|
else:
|
||||||
|
qValues[action][state] = None
|
||||||
|
valuesPretty = self.prettyValues(values)
|
||||||
|
policyPretty = self.prettyPolicy(policy)
|
||||||
|
qValuesPretty = {}
|
||||||
|
for action in actions:
|
||||||
|
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||||
|
return (valuesPretty, qValuesPretty, actions, policyPretty)
|
||||||
|
|
||||||
|
def prettyPrint(self, elements, formatString):
|
||||||
|
pretty = ''
|
||||||
|
states = self.grid.getStates()
|
||||||
|
for ybar in range(self.grid.grid.height):
|
||||||
|
y = self.grid.grid.height-1-ybar
|
||||||
|
row = []
|
||||||
|
for x in range(self.grid.grid.width):
|
||||||
|
if (x, y) in states:
|
||||||
|
value = elements[(x, y)]
|
||||||
|
if value is None:
|
||||||
|
row.append(' illegal')
|
||||||
|
else:
|
||||||
|
row.append(formatString.format(elements[(x,y)]))
|
||||||
|
else:
|
||||||
|
row.append('_' * 10)
|
||||||
|
pretty += ' %s\n' % (" ".join(row), )
|
||||||
|
pretty += '\n'
|
||||||
|
return pretty
|
||||||
|
|
||||||
|
def prettyValues(self, values):
|
||||||
|
return self.prettyPrint(values, '{0:10.4f}')
|
||||||
|
|
||||||
|
def prettyPolicy(self, policy):
|
||||||
|
return self.prettyPrint(policy, '{0:10s}')
|
||||||
|
|
||||||
|
def prettyValueSolutionString(self, name, pretty):
|
||||||
|
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||||
|
|
||||||
|
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||||
|
aList = self.parsePrettyValues(aPretty)
|
||||||
|
bList = self.parsePrettyValues(bPretty)
|
||||||
|
if len(aList) != len(bList):
|
||||||
|
return False
|
||||||
|
for a, b in zip(aList, bList):
|
||||||
|
try:
|
||||||
|
aNum = float(a)
|
||||||
|
bNum = float(b)
|
||||||
|
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||||
|
error = abs(aNum - bNum)
|
||||||
|
if error > tolerance:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
if a.strip() != b.strip():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def parsePrettyValues(self, pretty):
|
||||||
|
values = pretty.split()
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class ApproximateQLearningTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(ApproximateQLearningTest, self).__init__(question, testDict)
|
||||||
|
self.discount = float(testDict['discount'])
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||||
|
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||||
|
self.epsilon = float(testDict['epsilon'])
|
||||||
|
self.learningRate = float(testDict['learningRate'])
|
||||||
|
self.extractor = 'IdentityExtractor'
|
||||||
|
if 'extractor' in testDict:
|
||||||
|
self.extractor = testDict['extractor']
|
||||||
|
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||||
|
numExperiences = int(testDict['numExperiences'])
|
||||||
|
maxPreExperiences = 10
|
||||||
|
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
|
||||||
|
self.testOutFile = testDict['test_out_file']
|
||||||
|
if maxPreExperiences < numExperiences:
|
||||||
|
self.numsExperiencesForDisplay.append(numExperiences)
|
||||||
|
|
||||||
|
def writeFailureFile(self, string):
|
||||||
|
with open(self.testOutFile, 'w') as handle:
|
||||||
|
handle.write(string)
|
||||||
|
|
||||||
|
def removeFailureFileIfExists(self):
|
||||||
|
if os.path.exists(self.testOutFile):
|
||||||
|
os.remove(self.testOutFile)
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
failureOutputFileString = ''
|
||||||
|
failureOutputStdString = ''
|
||||||
|
for n in self.numsExperiencesForDisplay:
|
||||||
|
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n)
|
||||||
|
failureOutputStdString += stdOutString
|
||||||
|
failureOutputFileString += fileOutString
|
||||||
|
if not testPass:
|
||||||
|
self.addMessage(failureOutputStdString)
|
||||||
|
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||||
|
self.writeFailureFile(failureOutputFileString)
|
||||||
|
return self.testFail(grades)
|
||||||
|
self.removeFailureFileIfExists()
|
||||||
|
return self.testPass(grades)
|
||||||
|
|
||||||
|
def executeNExperiences(self, grades, moduleDict, solutionDict, n):
|
||||||
|
testPass = True
|
||||||
|
qValuesPretty, weights, actions, lastExperience = self.runAgent(moduleDict, n)
|
||||||
|
stdOutString = ''
|
||||||
|
fileOutString = "==================== Iteration %d ====================\n" % n
|
||||||
|
if lastExperience is not None:
|
||||||
|
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
|
||||||
|
weightsKey = 'weights_k_%d' % n
|
||||||
|
if weights == eval(solutionDict[weightsKey]):
|
||||||
|
fileOutString += "Weights at iteration %d are correct." % n
|
||||||
|
fileOutString += " Student/correct solution:\n\n%s\n\n" % pp.pformat(weights)
|
||||||
|
for action in actions:
|
||||||
|
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||||
|
qValues = qValuesPretty[action]
|
||||||
|
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||||
|
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
|
||||||
|
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
else:
|
||||||
|
testPass = False
|
||||||
|
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
|
||||||
|
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
return testPass, stdOutString, fileOutString
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
for n in self.numsExperiencesForDisplay:
|
||||||
|
qValuesPretty, weights, actions, _ = self.runAgent(moduleDict, n)
|
||||||
|
handle.write(self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights)))
|
||||||
|
for action in actions:
|
||||||
|
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def runAgent(self, moduleDict, numExperiences):
|
||||||
|
agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
|
||||||
|
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||||
|
states.sort()
|
||||||
|
randObj = FixedRandom().random
|
||||||
|
# choose a random start state and a random possible action from that state
|
||||||
|
# get the next state and reward from the transition function
|
||||||
|
lastExperience = None
|
||||||
|
for i in range(numExperiences):
|
||||||
|
startState = randObj.choice(states)
|
||||||
|
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||||
|
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||||
|
lastExperience = (startState, action, endState, reward)
|
||||||
|
agent.update(*lastExperience)
|
||||||
|
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||||
|
qValues = {}
|
||||||
|
weights = agent.getWeights()
|
||||||
|
for state in states:
|
||||||
|
possibleActions = self.grid.getPossibleActions(state)
|
||||||
|
for action in actions:
|
||||||
|
if not qValues.has_key(action):
|
||||||
|
qValues[action] = {}
|
||||||
|
if action in possibleActions:
|
||||||
|
qValues[action][state] = agent.getQValue(state, action)
|
||||||
|
else:
|
||||||
|
qValues[action][state] = None
|
||||||
|
qValuesPretty = {}
|
||||||
|
for action in actions:
|
||||||
|
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||||
|
return (qValuesPretty, weights, actions, lastExperience)
|
||||||
|
|
||||||
|
def prettyPrint(self, elements, formatString):
|
||||||
|
pretty = ''
|
||||||
|
states = self.grid.getStates()
|
||||||
|
for ybar in range(self.grid.grid.height):
|
||||||
|
y = self.grid.grid.height-1-ybar
|
||||||
|
row = []
|
||||||
|
for x in range(self.grid.grid.width):
|
||||||
|
if (x, y) in states:
|
||||||
|
value = elements[(x, y)]
|
||||||
|
if value is None:
|
||||||
|
row.append(' illegal')
|
||||||
|
else:
|
||||||
|
row.append(formatString.format(elements[(x,y)]))
|
||||||
|
else:
|
||||||
|
row.append('_' * 10)
|
||||||
|
pretty += ' %s\n' % (" ".join(row), )
|
||||||
|
pretty += '\n'
|
||||||
|
return pretty
|
||||||
|
|
||||||
|
def prettyValues(self, values):
|
||||||
|
return self.prettyPrint(values, '{0:10.4f}')
|
||||||
|
|
||||||
|
def prettyPolicy(self, policy):
|
||||||
|
return self.prettyPrint(policy, '{0:10s}')
|
||||||
|
|
||||||
|
def prettyValueSolutionString(self, name, pretty):
|
||||||
|
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||||
|
|
||||||
|
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||||
|
aList = self.parsePrettyValues(aPretty)
|
||||||
|
bList = self.parsePrettyValues(bPretty)
|
||||||
|
if len(aList) != len(bList):
|
||||||
|
return False
|
||||||
|
for a, b in zip(aList, bList):
|
||||||
|
try:
|
||||||
|
aNum = float(a)
|
||||||
|
bNum = float(b)
|
||||||
|
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||||
|
error = abs(aNum - bNum)
|
||||||
|
if error > tolerance:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
if a.strip() != b.strip():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def parsePrettyValues(self, pretty):
|
||||||
|
values = pretty.split()
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class QLearningTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(QLearningTest, self).__init__(question, testDict)
|
||||||
|
self.discount = float(testDict['discount'])
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||||
|
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||||
|
self.epsilon = float(testDict['epsilon'])
|
||||||
|
self.learningRate = float(testDict['learningRate'])
|
||||||
|
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||||
|
numExperiences = int(testDict['numExperiences'])
|
||||||
|
maxPreExperiences = 10
|
||||||
|
self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
|
||||||
|
self.testOutFile = testDict['test_out_file']
|
||||||
|
if maxPreExperiences < numExperiences:
|
||||||
|
self.numsExperiencesForDisplay.append(numExperiences)
|
||||||
|
|
||||||
|
def writeFailureFile(self, string):
|
||||||
|
with open(self.testOutFile, 'w') as handle:
|
||||||
|
handle.write(string)
|
||||||
|
|
||||||
|
def removeFailureFileIfExists(self):
|
||||||
|
if os.path.exists(self.testOutFile):
|
||||||
|
os.remove(self.testOutFile)
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
failureOutputFileString = ''
|
||||||
|
failureOutputStdString = ''
|
||||||
|
for n in self.numsExperiencesForDisplay:
|
||||||
|
checkValuesAndPolicy = (n == self.numsExperiencesForDisplay[-1])
|
||||||
|
testPass, stdOutString, fileOutString = self.executeNExperiences(grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
|
||||||
|
failureOutputStdString += stdOutString
|
||||||
|
failureOutputFileString += fileOutString
|
||||||
|
if not testPass:
|
||||||
|
self.addMessage(failureOutputStdString)
|
||||||
|
self.addMessage('For more details to help you debug, see test output file %s\n\n' % self.testOutFile)
|
||||||
|
self.writeFailureFile(failureOutputFileString)
|
||||||
|
return self.testFail(grades)
|
||||||
|
self.removeFailureFileIfExists()
|
||||||
|
return self.testPass(grades)
|
||||||
|
|
||||||
|
def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy):
|
||||||
|
testPass = True
|
||||||
|
valuesPretty, qValuesPretty, actions, policyPretty, lastExperience = self.runAgent(moduleDict, n)
|
||||||
|
stdOutString = ''
|
||||||
|
fileOutString = "==================== Iteration %d ====================\n" % n
|
||||||
|
if lastExperience is not None:
|
||||||
|
fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
|
||||||
|
for action in actions:
|
||||||
|
qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
|
||||||
|
qValues = qValuesPretty[action]
|
||||||
|
if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
|
||||||
|
fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
|
||||||
|
fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
else:
|
||||||
|
testPass = False
|
||||||
|
outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)
|
||||||
|
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
|
||||||
|
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, solutionDict[qValuesKey])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
if checkValuesAndPolicy:
|
||||||
|
if not self.comparePrettyValues(valuesPretty, solutionDict['values']):
|
||||||
|
testPass = False
|
||||||
|
outString = "Values are NOT correct."
|
||||||
|
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('values', valuesPretty)
|
||||||
|
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('values', solutionDict['values'])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
if not self.comparePrettyValues(policyPretty, solutionDict['policy']):
|
||||||
|
testPass = False
|
||||||
|
outString = "Policy is NOT correct."
|
||||||
|
outString += " Student solution:\n\t%s" % self.prettyValueSolutionString('policy', policyPretty)
|
||||||
|
outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString('policy', solutionDict['policy'])
|
||||||
|
stdOutString += outString
|
||||||
|
fileOutString += outString
|
||||||
|
return testPass, stdOutString, fileOutString
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
valuesPretty = ''
|
||||||
|
policyPretty = ''
|
||||||
|
for n in self.numsExperiencesForDisplay:
|
||||||
|
valuesPretty, qValuesPretty, actions, policyPretty, _ = self.runAgent(moduleDict, n)
|
||||||
|
for action in actions:
|
||||||
|
handle.write(self.prettyValueSolutionString('q_values_k_%d_action_%s' % (n, action), qValuesPretty[action]))
|
||||||
|
handle.write(self.prettyValueSolutionString('values', valuesPretty))
|
||||||
|
handle.write(self.prettyValueSolutionString('policy', policyPretty))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def runAgent(self, moduleDict, numExperiences):
|
||||||
|
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
|
||||||
|
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||||
|
states.sort()
|
||||||
|
randObj = FixedRandom().random
|
||||||
|
# choose a random start state and a random possible action from that state
|
||||||
|
# get the next state and reward from the transition function
|
||||||
|
lastExperience = None
|
||||||
|
for i in range(numExperiences):
|
||||||
|
startState = randObj.choice(states)
|
||||||
|
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||||
|
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||||
|
lastExperience = (startState, action, endState, reward)
|
||||||
|
agent.update(*lastExperience)
|
||||||
|
actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
|
||||||
|
values = {}
|
||||||
|
qValues = {}
|
||||||
|
policy = {}
|
||||||
|
for state in states:
|
||||||
|
values[state] = agent.computeValueFromQValues(state)
|
||||||
|
policy[state] = agent.computeActionFromQValues(state)
|
||||||
|
possibleActions = self.grid.getPossibleActions(state)
|
||||||
|
for action in actions:
|
||||||
|
if not qValues.has_key(action):
|
||||||
|
qValues[action] = {}
|
||||||
|
if action in possibleActions:
|
||||||
|
qValues[action][state] = agent.getQValue(state, action)
|
||||||
|
else:
|
||||||
|
qValues[action][state] = None
|
||||||
|
valuesPretty = self.prettyValues(values)
|
||||||
|
policyPretty = self.prettyPolicy(policy)
|
||||||
|
qValuesPretty = {}
|
||||||
|
for action in actions:
|
||||||
|
qValuesPretty[action] = self.prettyValues(qValues[action])
|
||||||
|
return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
|
||||||
|
|
||||||
|
def prettyPrint(self, elements, formatString):
|
||||||
|
pretty = ''
|
||||||
|
states = self.grid.getStates()
|
||||||
|
for ybar in range(self.grid.grid.height):
|
||||||
|
y = self.grid.grid.height-1-ybar
|
||||||
|
row = []
|
||||||
|
for x in range(self.grid.grid.width):
|
||||||
|
if (x, y) in states:
|
||||||
|
value = elements[(x, y)]
|
||||||
|
if value is None:
|
||||||
|
row.append(' illegal')
|
||||||
|
else:
|
||||||
|
row.append(formatString.format(elements[(x,y)]))
|
||||||
|
else:
|
||||||
|
row.append('_' * 10)
|
||||||
|
pretty += ' %s\n' % (" ".join(row), )
|
||||||
|
pretty += '\n'
|
||||||
|
return pretty
|
||||||
|
|
||||||
|
def prettyValues(self, values):
|
||||||
|
return self.prettyPrint(values, '{0:10.4f}')
|
||||||
|
|
||||||
|
def prettyPolicy(self, policy):
|
||||||
|
return self.prettyPrint(policy, '{0:10s}')
|
||||||
|
|
||||||
|
def prettyValueSolutionString(self, name, pretty):
|
||||||
|
return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())
|
||||||
|
|
||||||
|
def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
|
||||||
|
aList = self.parsePrettyValues(aPretty)
|
||||||
|
bList = self.parsePrettyValues(bPretty)
|
||||||
|
if len(aList) != len(bList):
|
||||||
|
return False
|
||||||
|
for a, b in zip(aList, bList):
|
||||||
|
try:
|
||||||
|
aNum = float(a)
|
||||||
|
bNum = float(b)
|
||||||
|
# error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
|
||||||
|
error = abs(aNum - bNum)
|
||||||
|
if error > tolerance:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
if a.strip() != b.strip():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def parsePrettyValues(self, pretty):
|
||||||
|
values = pretty.split()
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class EpsilonGreedyTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(EpsilonGreedyTest, self).__init__(question, testDict)
|
||||||
|
self.discount = float(testDict['discount'])
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
|
||||||
|
if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
|
||||||
|
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
self.env = gridworld.GridworldEnvironment(self.grid)
|
||||||
|
self.epsilon = float(testDict['epsilon'])
|
||||||
|
self.learningRate = float(testDict['learningRate'])
|
||||||
|
self.numExperiences = int(testDict['numExperiences'])
|
||||||
|
self.numIterations = int(testDict['iterations'])
|
||||||
|
self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
if self.testEpsilonGreedy(moduleDict):
|
||||||
|
return self.testPass(grades)
|
||||||
|
else:
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||||
|
handle.write('# File intentionally blank.\n')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def runAgent(self, moduleDict):
|
||||||
|
agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
|
||||||
|
states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
|
||||||
|
states.sort()
|
||||||
|
randObj = FixedRandom().random
|
||||||
|
# choose a random start state and a random possible action from that state
|
||||||
|
# get the next state and reward from the transition function
|
||||||
|
for i in range(self.numExperiences):
|
||||||
|
startState = randObj.choice(states)
|
||||||
|
action = randObj.choice(self.grid.getPossibleActions(startState))
|
||||||
|
(endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
|
||||||
|
agent.update(startState, action, endState, reward)
|
||||||
|
return agent
|
||||||
|
|
||||||
|
def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
|
||||||
|
agent = self.runAgent(moduleDict)
|
||||||
|
for state in self.grid.getStates():
|
||||||
|
numLegalActions = len(agent.getLegalActions(state))
|
||||||
|
if numLegalActions <= 1:
|
||||||
|
continue
|
||||||
|
numGreedyChoices = 0
|
||||||
|
optimalAction = agent.computeActionFromQValues(state)
|
||||||
|
for iteration in range(self.numIterations):
|
||||||
|
# assume that their computeActionFromQValues implementation is correct (q4 tests this)
|
||||||
|
if agent.getAction(state) == optimalAction:
|
||||||
|
numGreedyChoices += 1
|
||||||
|
# e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
|
||||||
|
# g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
|
||||||
|
empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
|
||||||
|
empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(numLegalActions)
|
||||||
|
empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
|
||||||
|
error = abs(empiricalEpsilon - self.epsilon)
|
||||||
|
if error > tolerance:
|
||||||
|
self.addMessage("Epsilon-greedy action selection is not correct.")
|
||||||
|
self.addMessage("Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance))
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
### q6
|
||||||
|
class Question6Test(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(Question6Test, self).__init__(question, testDict)
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
studentSolution = moduleDict['analysis'].question6()
|
||||||
|
studentSolution = str(studentSolution).strip().lower()
|
||||||
|
hashedSolution = sha1(studentSolution).hexdigest()
|
||||||
|
if hashedSolution == '46729c96bb1e4081fdc81a8ff74b3e5db8fba415':
|
||||||
|
return self.testPass(grades)
|
||||||
|
else:
|
||||||
|
self.addMessage("Solution is not correct.")
|
||||||
|
self.addMessage(" Student solution: %s" % (studentSolution,))
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
handle = open(filePath, 'w')
|
||||||
|
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||||
|
handle.write('# File intentionally blank.\n')
|
||||||
|
handle.close()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
### q7/q8
|
||||||
|
### =====
|
||||||
|
## Average wins of a pacman agent
|
||||||
|
|
||||||
|
class EvalAgentTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(EvalAgentTest, self).__init__(question, testDict)
|
||||||
|
self.pacmanParams = testDict['pacmanParams']
|
||||||
|
|
||||||
|
self.scoreMinimum = int(testDict['scoreMinimum']) if 'scoreMinimum' in testDict else None
|
||||||
|
self.nonTimeoutMinimum = int(testDict['nonTimeoutMinimum']) if 'nonTimeoutMinimum' in testDict else None
|
||||||
|
self.winsMinimum = int(testDict['winsMinimum']) if 'winsMinimum' in testDict else None
|
||||||
|
|
||||||
|
self.scoreThresholds = [int(s) for s in testDict.get('scoreThresholds','').split()]
|
||||||
|
self.nonTimeoutThresholds = [int(s) for s in testDict.get('nonTimeoutThresholds','').split()]
|
||||||
|
self.winsThresholds = [int(s) for s in testDict.get('winsThresholds','').split()]
|
||||||
|
|
||||||
|
self.maxPoints = sum([len(t) for t in [self.scoreThresholds, self.nonTimeoutThresholds, self.winsThresholds]])
|
||||||
|
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
self.addMessage('Grading agent using command: python pacman.py %s'% (self.pacmanParams,))
|
||||||
|
|
||||||
|
startTime = time.time()
|
||||||
|
games = pacman.runGames(** pacman.readCommand(self.pacmanParams.split(' ')))
|
||||||
|
totalTime = time.time() - startTime
|
||||||
|
numGames = len(games)
|
||||||
|
|
||||||
|
stats = {'time': totalTime, 'wins': [g.state.isWin() for g in games].count(True),
|
||||||
|
'games': games, 'scores': [g.state.getScore() for g in games],
|
||||||
|
'timeouts': [g.agentTimeout for g in games].count(True), 'crashes': [g.agentCrashed for g in games].count(True)}
|
||||||
|
|
||||||
|
averageScore = sum(stats['scores']) / float(len(stats['scores']))
|
||||||
|
nonTimeouts = numGames - stats['timeouts']
|
||||||
|
wins = stats['wins']
|
||||||
|
|
||||||
|
def gradeThreshold(value, minimum, thresholds, name):
|
||||||
|
points = 0
|
||||||
|
passed = (minimum == None) or (value >= minimum)
|
||||||
|
if passed:
|
||||||
|
for t in thresholds:
|
||||||
|
if value >= t:
|
||||||
|
points += 1
|
||||||
|
return (passed, points, value, minimum, thresholds, name)
|
||||||
|
|
||||||
|
results = [gradeThreshold(averageScore, self.scoreMinimum, self.scoreThresholds, "average score"),
|
||||||
|
gradeThreshold(nonTimeouts, self.nonTimeoutMinimum, self.nonTimeoutThresholds, "games not timed out"),
|
||||||
|
gradeThreshold(wins, self.winsMinimum, self.winsThresholds, "wins")]
|
||||||
|
|
||||||
|
totalPoints = 0
|
||||||
|
for passed, points, value, minimum, thresholds, name in results:
|
||||||
|
if minimum == None and len(thresholds)==0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# print passed, points, value, minimum, thresholds, name
|
||||||
|
totalPoints += points
|
||||||
|
if not passed:
|
||||||
|
assert points == 0
|
||||||
|
self.addMessage("%s %s (fail: below minimum value %s)" % (value, name, minimum))
|
||||||
|
else:
|
||||||
|
self.addMessage("%s %s (%s of %s points)" % (value, name, points, len(thresholds)))
|
||||||
|
|
||||||
|
if minimum != None:
|
||||||
|
self.addMessage(" Grading scheme:")
|
||||||
|
self.addMessage(" < %s: fail" % (minimum,))
|
||||||
|
if len(thresholds)==0 or minimum != thresholds[0]:
|
||||||
|
self.addMessage(" >= %s: 0 points" % (minimum,))
|
||||||
|
for idx, threshold in enumerate(thresholds):
|
||||||
|
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
|
||||||
|
elif len(thresholds) > 0:
|
||||||
|
self.addMessage(" Grading scheme:")
|
||||||
|
self.addMessage(" < %s: 0 points" % (thresholds[0],))
|
||||||
|
for idx, threshold in enumerate(thresholds):
|
||||||
|
self.addMessage(" >= %s: %s points" % (threshold, idx+1))
|
||||||
|
|
||||||
|
if any([not passed for passed, _, _, _, _, _ in results]):
|
||||||
|
totalPoints = 0
|
||||||
|
|
||||||
|
return self.testPartial(grades, totalPoints, self.maxPoints)
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||||
|
handle.write('# File intentionally blank.\n')
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### q2/q3
|
||||||
|
### =====
|
||||||
|
## For each parameter setting, compute the optimal policy, see if it satisfies some properties
|
||||||
|
|
||||||
|
def followPath(policy, start, numSteps=100):
|
||||||
|
state = start
|
||||||
|
path = []
|
||||||
|
for i in range(numSteps):
|
||||||
|
if state not in policy:
|
||||||
|
break
|
||||||
|
action = policy[state]
|
||||||
|
path.append("(%s,%s)" % state)
|
||||||
|
if action == 'north': nextState = state[0],state[1]+1
|
||||||
|
if action == 'south': nextState = state[0],state[1]-1
|
||||||
|
if action == 'east': nextState = state[0]+1,state[1]
|
||||||
|
if action == 'west': nextState = state[0]-1,state[1]
|
||||||
|
if action == 'exit' or action == None:
|
||||||
|
path.append('TERMINAL_STATE')
|
||||||
|
break
|
||||||
|
state = nextState
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
def parseGrid(string):
|
||||||
|
grid = [[entry.strip() for entry in line.split()] for line in string.split('\n')]
|
||||||
|
for row in grid:
|
||||||
|
for x, col in enumerate(row):
|
||||||
|
try:
|
||||||
|
col = int(col)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if col == "_":
|
||||||
|
col = ' '
|
||||||
|
row[x] = col
|
||||||
|
return gridworld.makeGrid(grid)
|
||||||
|
|
||||||
|
|
||||||
|
def computePolicy(moduleDict, grid, discount):
|
||||||
|
valueIterator = moduleDict['valueIterationAgents'].ValueIterationAgent(grid, discount=discount)
|
||||||
|
policy = {}
|
||||||
|
for state in grid.getStates():
|
||||||
|
policy[state] = valueIterator.computeActionFromValues(state)
|
||||||
|
return policy
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GridPolicyTest(testClasses.TestCase):
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
super(GridPolicyTest, self).__init__(question, testDict)
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
self.parameterFn = testDict['parameterFn']
|
||||||
|
self.question2 = testDict.get('question2', 'false').lower() == 'true'
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
self.gridText = testDict['grid']
|
||||||
|
self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
|
||||||
|
self.gridName = testDict['gridName']
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
self.policy = parseGrid(testDict['policy'])
|
||||||
|
|
||||||
|
# State the most probable path must visit
|
||||||
|
# (x,y) for a particular location; (0,0) is bottom left
|
||||||
|
# terminal for the terminal state
|
||||||
|
self.pathVisits = testDict.get('pathVisits', None)
|
||||||
|
|
||||||
|
# State the most probable path must not visit
|
||||||
|
# (x,y) for a particular location; (0,0) is bottom left
|
||||||
|
# terminal for the terminal state
|
||||||
|
self.pathNotVisits = testDict.get('pathNotVisits', None)
|
||||||
|
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
if not hasattr(moduleDict['analysis'], self.parameterFn):
|
||||||
|
self.addMessage('Method not implemented: analysis.%s' % (self.parameterFn,))
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
result = getattr(moduleDict['analysis'], self.parameterFn)()
|
||||||
|
|
||||||
|
if type(result) == str and result.lower()[0:3] == "not":
|
||||||
|
self.addMessage('Actually, it is possible!')
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
if self.question2:
|
||||||
|
livingReward = None
|
||||||
|
try:
|
||||||
|
discount, noise = result
|
||||||
|
discount = float(discount)
|
||||||
|
noise = float(noise)
|
||||||
|
except:
|
||||||
|
self.addMessage('Did not return a (discount, noise) pair; instead analysis.%s returned: %s' % (self.parameterFn, result))
|
||||||
|
return self.testFail(grades)
|
||||||
|
if discount != 0.9 and noise != 0.2:
|
||||||
|
self.addMessage('Must change either the discount or the noise, not both. Returned (discount, noise) = %s' % (result,))
|
||||||
|
return self.testFail(grades)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
discount, noise, livingReward = result
|
||||||
|
discount = float(discount)
|
||||||
|
noise = float(noise)
|
||||||
|
livingReward = float(livingReward)
|
||||||
|
except:
|
||||||
|
self.addMessage('Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %s' % (self.parameterFn, result))
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
self.grid.setNoise(noise)
|
||||||
|
if livingReward != None:
|
||||||
|
self.grid.setLivingReward(livingReward)
|
||||||
|
|
||||||
|
start = self.grid.getStartState()
|
||||||
|
policy = computePolicy(moduleDict, self.grid, discount)
|
||||||
|
|
||||||
|
## check policy
|
||||||
|
actionMap = {'N': 'north', 'E': 'east', 'S': 'south', 'W': 'west', 'X': 'exit'}
|
||||||
|
width, height = self.policy.width, self.policy.height
|
||||||
|
policyPassed = True
|
||||||
|
for x in range(width):
|
||||||
|
for y in range(height):
|
||||||
|
if self.policy[x][y] in actionMap and policy[(x,y)] != actionMap[self.policy[x][y]]:
|
||||||
|
differPoint = (x,y)
|
||||||
|
policyPassed = False
|
||||||
|
|
||||||
|
if not policyPassed:
|
||||||
|
self.addMessage('Policy not correct.')
|
||||||
|
self.addMessage(' Student policy at %s: %s' % (differPoint, policy[differPoint]))
|
||||||
|
self.addMessage(' Correct policy at %s: %s' % (differPoint, actionMap[self.policy[differPoint[0]][differPoint[1]]]))
|
||||||
|
self.addMessage(' Student policy:')
|
||||||
|
self.printPolicy(policy, False)
|
||||||
|
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||||
|
self.addMessage(" . at states where the policy is not defined (e.g. walls)")
|
||||||
|
self.addMessage(' Correct policy specification:')
|
||||||
|
self.printPolicy(self.policy, True)
|
||||||
|
self.addMessage(" Legend: N,S,E,W for states in which the student policy must move north etc,")
|
||||||
|
self.addMessage(" _ for states where it doesn't matter what the student policy does.")
|
||||||
|
self.printGridworld()
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
## check path
|
||||||
|
path = followPath(policy, self.grid.getStartState())
|
||||||
|
|
||||||
|
if self.pathVisits != None and self.pathVisits not in path:
|
||||||
|
self.addMessage('Policy does not visit state %s when moving without noise.' % (self.pathVisits,))
|
||||||
|
self.addMessage(' States visited: %s' % (path,))
|
||||||
|
self.addMessage(' Student policy:')
|
||||||
|
self.printPolicy(policy, False)
|
||||||
|
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||||
|
self.addMessage(" . at states where policy not defined")
|
||||||
|
self.printGridworld()
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
if self.pathNotVisits != None and self.pathNotVisits in path:
|
||||||
|
self.addMessage('Policy visits state %s when moving without noise.' % (self.pathNotVisits,))
|
||||||
|
self.addMessage(' States visited: %s' % (path,))
|
||||||
|
self.addMessage(' Student policy:')
|
||||||
|
self.printPolicy(policy, False)
|
||||||
|
self.addMessage(" Legend: N,S,E,W at states which move north etc, X at states which exit,")
|
||||||
|
self.addMessage(" . at states where policy not defined")
|
||||||
|
self.printGridworld()
|
||||||
|
return self.testFail(grades)
|
||||||
|
|
||||||
|
return self.testPass(grades)
|
||||||
|
|
||||||
|
def printGridworld(self):
|
||||||
|
self.addMessage(' Gridworld:')
|
||||||
|
for line in self.gridText.split('\n'):
|
||||||
|
self.addMessage(' ' + line)
|
||||||
|
self.addMessage(' Legend: # wall, _ empty, S start, numbers terminal states with that reward.')
|
||||||
|
|
||||||
|
def printPolicy(self, policy, policyTypeIsGrid):
|
||||||
|
if policyTypeIsGrid:
|
||||||
|
legend = {'N': 'N', 'E': 'E', 'S': 'S', 'W': 'W', ' ': '_'}
|
||||||
|
else:
|
||||||
|
legend = {'north': 'N', 'east': 'E', 'south': 'S', 'west': 'W', 'exit': 'X', '.': '.', ' ': '_'}
|
||||||
|
|
||||||
|
for ybar in range(self.grid.grid.height):
|
||||||
|
y = self.grid.grid.height-1-ybar
|
||||||
|
if policyTypeIsGrid:
|
||||||
|
self.addMessage(" %s" % (" ".join([legend[policy[x][y]] for x in range(self.grid.grid.width)]),))
|
||||||
|
else:
|
||||||
|
self.addMessage(" %s" % (" ".join([legend[policy.get((x,y), '.')] for x in range(self.grid.grid.width)]),))
|
||||||
|
# for state in sorted(self.grid.getStates()):
|
||||||
|
# if state != 'TERMINAL_STATE':
|
||||||
|
# self.addMessage(' (%s,%s) %s' % (state[0], state[1], policy[state]))
|
||||||
|
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
with open(filePath, 'w') as handle:
|
||||||
|
handle.write('# This is the solution file for %s.\n' % self.path)
|
||||||
|
handle.write('# File intentionally blank.\n')
|
||||||
|
return True
|
||||||
|
|
||||||
189
p3_rl/testClasses.py
Normal file
189
p3_rl/testClasses.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
# testClasses.py
|
||||||
|
# --------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
# import modules from python standard library
|
||||||
|
import inspect
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
# Class which models a question in a project. Note that questions have a
|
||||||
|
# maximum number of points they are worth, and are composed of a series of
|
||||||
|
# test cases
|
||||||
|
class Question(object):
|
||||||
|
|
||||||
|
def raiseNotDefined(self):
|
||||||
|
print 'Method not implemented: %s' % inspect.stack()[1][3]
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def __init__(self, questionDict, display):
|
||||||
|
self.maxPoints = int(questionDict['max_points'])
|
||||||
|
self.testCases = []
|
||||||
|
self.display = display
|
||||||
|
|
||||||
|
def getDisplay(self):
|
||||||
|
return self.display
|
||||||
|
|
||||||
|
def getMaxPoints(self):
|
||||||
|
return self.maxPoints
|
||||||
|
|
||||||
|
# Note that 'thunk' must be a function which accepts a single argument,
|
||||||
|
# namely a 'grading' object
|
||||||
|
def addTestCase(self, testCase, thunk):
|
||||||
|
self.testCases.append((testCase, thunk))
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
self.raiseNotDefined()
|
||||||
|
|
||||||
|
# Question in which all test cases must be passed in order to receive credit
|
||||||
|
class PassAllTestsQuestion(Question):
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
|
||||||
|
testsFailed = False
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
for _, f in self.testCases:
|
||||||
|
if not f(grades):
|
||||||
|
testsFailed = True
|
||||||
|
if testsFailed:
|
||||||
|
grades.fail("Tests failed.")
|
||||||
|
else:
|
||||||
|
grades.assignFullCredit()
|
||||||
|
|
||||||
|
|
||||||
|
# Question in which predict credit is given for test cases with a ``points'' property.
|
||||||
|
# All other tests are mandatory and must be passed.
|
||||||
|
class HackedPartialCreditQuestion(Question):
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
# TODO: is this the right way to use grades? The autograder doesn't seem to use it.
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
|
||||||
|
points = 0
|
||||||
|
passed = True
|
||||||
|
for testCase, f in self.testCases:
|
||||||
|
testResult = f(grades)
|
||||||
|
if "points" in testCase.testDict:
|
||||||
|
if testResult: points += float(testCase.testDict["points"])
|
||||||
|
else:
|
||||||
|
passed = passed and testResult
|
||||||
|
|
||||||
|
## FIXME: Below terrible hack to match q3's logic
|
||||||
|
if int(points) == self.maxPoints and not passed:
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
else:
|
||||||
|
grades.addPoints(int(points))
|
||||||
|
|
||||||
|
|
||||||
|
class Q6PartialCreditQuestion(Question):
|
||||||
|
"""Fails any test which returns False, otherwise doesn't effect the grades object.
|
||||||
|
Partial credit tests will add the required points."""
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for _, f in self.testCases:
|
||||||
|
results.append(f(grades))
|
||||||
|
if False in results:
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
|
||||||
|
class PartialCreditQuestion(Question):
|
||||||
|
"""Fails any test which returns False, otherwise doesn't effect the grades object.
|
||||||
|
Partial credit tests will add the required points."""
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
|
||||||
|
for _, f in self.testCases:
|
||||||
|
if not f(grades):
|
||||||
|
grades.assignZeroCredit()
|
||||||
|
grades.fail("Tests failed.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NumberPassedQuestion(Question):
|
||||||
|
"""Grade is the number of test cases passed."""
|
||||||
|
|
||||||
|
def execute(self, grades):
|
||||||
|
grades.addPoints([f(grades) for _, f in self.testCases].count(True))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Template modeling a generic test case
|
||||||
|
class TestCase(object):
|
||||||
|
|
||||||
|
def raiseNotDefined(self):
|
||||||
|
print 'Method not implemented: %s' % inspect.stack()[1][3]
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def getPath(self):
|
||||||
|
return self.path
|
||||||
|
|
||||||
|
def __init__(self, question, testDict):
|
||||||
|
self.question = question
|
||||||
|
self.testDict = testDict
|
||||||
|
self.path = testDict['path']
|
||||||
|
self.messages = []
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
self.raiseNotDefined()
|
||||||
|
|
||||||
|
def execute(self, grades, moduleDict, solutionDict):
|
||||||
|
self.raiseNotDefined()
|
||||||
|
|
||||||
|
def writeSolution(self, moduleDict, filePath):
|
||||||
|
self.raiseNotDefined()
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Tests should call the following messages for grading
|
||||||
|
# to ensure a uniform format for test output.
|
||||||
|
#
|
||||||
|
# TODO: this is hairy, but we need to fix grading.py's interface
|
||||||
|
# to get a nice hierarchical project - question - test structure,
|
||||||
|
# then these should be moved into Question proper.
|
||||||
|
def testPass(self, grades):
|
||||||
|
grades.addMessage('PASS: %s' % (self.path,))
|
||||||
|
for line in self.messages:
|
||||||
|
grades.addMessage(' %s' % (line,))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def testFail(self, grades):
|
||||||
|
grades.addMessage('FAIL: %s' % (self.path,))
|
||||||
|
for line in self.messages:
|
||||||
|
grades.addMessage(' %s' % (line,))
|
||||||
|
return False
|
||||||
|
|
||||||
|
# This should really be question level?
|
||||||
|
#
|
||||||
|
def testPartial(self, grades, points, maxPoints):
|
||||||
|
grades.addPoints(points)
|
||||||
|
extraCredit = max(0, points - maxPoints)
|
||||||
|
regularCredit = points - extraCredit
|
||||||
|
|
||||||
|
grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints))
|
||||||
|
if extraCredit > 0:
|
||||||
|
grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,))
|
||||||
|
|
||||||
|
for line in self.messages:
|
||||||
|
grades.addMessage(' %s' % (line,))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def addMessage(self, message):
|
||||||
|
self.messages.extend(message.split('\n'))
|
||||||
|
|
||||||
85
p3_rl/testParser.py
Normal file
85
p3_rl/testParser.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# testParser.py
|
||||||
|
# -------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
class TestParser(object):
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
# save the path to the test file
|
||||||
|
self.path = path
|
||||||
|
|
||||||
|
def removeComments(self, rawlines):
|
||||||
|
# remove any portion of a line following a '#' symbol
|
||||||
|
fixed_lines = []
|
||||||
|
for l in rawlines:
|
||||||
|
idx = l.find('#')
|
||||||
|
if idx == -1:
|
||||||
|
fixed_lines.append(l)
|
||||||
|
else:
|
||||||
|
fixed_lines.append(l[0:idx])
|
||||||
|
return '\n'.join(fixed_lines)
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
# read in the test case and remove comments
|
||||||
|
test = {}
|
||||||
|
with open(self.path) as handle:
|
||||||
|
raw_lines = handle.read().split('\n')
|
||||||
|
|
||||||
|
test_text = self.removeComments(raw_lines)
|
||||||
|
test['__raw_lines__'] = raw_lines
|
||||||
|
test['path'] = self.path
|
||||||
|
test['__emit__'] = []
|
||||||
|
lines = test_text.split('\n')
|
||||||
|
i = 0
|
||||||
|
# read a property in each loop cycle
|
||||||
|
while(i < len(lines)):
|
||||||
|
# skip blank lines
|
||||||
|
if re.match('\A\s*\Z', lines[i]):
|
||||||
|
test['__emit__'].append(("raw", raw_lines[i]))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i])
|
||||||
|
if m:
|
||||||
|
test[m.group(1)] = m.group(2)
|
||||||
|
test['__emit__'].append(("oneline", m.group(1)))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i])
|
||||||
|
if m:
|
||||||
|
msg = []
|
||||||
|
i += 1
|
||||||
|
while(not re.match('\A\s*"""\s*\Z', lines[i])):
|
||||||
|
msg.append(raw_lines[i])
|
||||||
|
i += 1
|
||||||
|
test[m.group(1)] = '\n'.join(msg)
|
||||||
|
test['__emit__'].append(("multiline", m.group(1)))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
print 'error parsing test file: %s' % self.path
|
||||||
|
sys.exit(1)
|
||||||
|
return test
|
||||||
|
|
||||||
|
|
||||||
|
def emitTestDict(testDict, handle):
|
||||||
|
for kind, data in testDict['__emit__']:
|
||||||
|
if kind == "raw":
|
||||||
|
handle.write(data + "\n")
|
||||||
|
elif kind == "oneline":
|
||||||
|
handle.write('%s: "%s"\n' % (data, testDict[data]))
|
||||||
|
elif kind == "multiline":
|
||||||
|
handle.write('%s: """\n%s\n"""\n' % (data, testDict[data]))
|
||||||
|
else:
|
||||||
|
raise Exception("Bad __emit__")
|
||||||
0
p3_rl/test_cases/CONFIG
Normal file
0
p3_rl/test_cases/CONFIG
Normal file
410
p3_rl/test_cases/q1/1-tinygrid.solution
Normal file
410
p3_rl/test_cases/q1/1-tinygrid.solution
Normal file
@@ -0,0 +1,410 @@
|
|||||||
|
values_k_0: """
|
||||||
|
0.0000
|
||||||
|
0.0000
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_1: """
|
||||||
|
-10.0000
|
||||||
|
0.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_2: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_3: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_4: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_5: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_6: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_7: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_8: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_9: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_100: """
|
||||||
|
-10.0000
|
||||||
|
5.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
5.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
2.5000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
exit
|
||||||
|
"""
|
||||||
|
|
||||||
|
actions: """
|
||||||
|
north
|
||||||
|
east
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
west
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q1/1-tinygrid.test
Normal file
22
p3_rl/test_cases/q1/1-tinygrid.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "ValueIterationTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.5"
|
||||||
|
noise: "0.0"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
410
p3_rl/test_cases/q1/2-tinygrid-noisy.solution
Normal file
410
p3_rl/test_cases/q1/2-tinygrid-noisy.solution
Normal file
@@ -0,0 +1,410 @@
|
|||||||
|
values_k_0: """
|
||||||
|
0.0000
|
||||||
|
0.0000
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_1: """
|
||||||
|
-10.0000
|
||||||
|
0.0000
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
-5.6250
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
5.6250
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_2: """
|
||||||
|
-10.0000
|
||||||
|
5.6250
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.5703
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
3.1641
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
6.6797
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
3.1641
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_3: """
|
||||||
|
-10.0000
|
||||||
|
6.6797
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3726
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
3.7573
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
6.8774
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
3.7573
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_4: """
|
||||||
|
-10.0000
|
||||||
|
6.8774
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3355
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8686
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9145
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8686
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_5: """
|
||||||
|
-10.0000
|
||||||
|
6.9145
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3285
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8894
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9215
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8894
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_6: """
|
||||||
|
-10.0000
|
||||||
|
6.9215
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3272
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8933
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9228
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8933
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_7: """
|
||||||
|
-10.0000
|
||||||
|
6.9228
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3270
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8941
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9230
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8941
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_8: """
|
||||||
|
-10.0000
|
||||||
|
6.9230
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3269
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9231
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_9: """
|
||||||
|
-10.0000
|
||||||
|
6.9231
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3269
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9231
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_100: """
|
||||||
|
-10.0000
|
||||||
|
6.9231
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-4.3269
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-10.0000
|
||||||
|
illegal
|
||||||
|
10.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
6.9231
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
3.8942
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
exit
|
||||||
|
"""
|
||||||
|
|
||||||
|
actions: """
|
||||||
|
north
|
||||||
|
east
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
west
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q1/2-tinygrid-noisy.test
Normal file
22
p3_rl/test_cases/q1/2-tinygrid-noisy.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "ValueIterationTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.75"
|
||||||
|
noise: "0.25"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
678
p3_rl/test_cases/q1/3-bridge.solution
Normal file
678
p3_rl/test_cases/q1/3-bridge.solution
Normal file
@@ -0,0 +1,678 @@
|
|||||||
|
values_k_0: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_1: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 0.0000 -100.0000
|
||||||
|
-100.0000 0.0000 -100.0000
|
||||||
|
-100.0000 0.0000 -100.0000
|
||||||
|
-100.0000 0.0000 -100.0000
|
||||||
|
-100.0000 0.0000 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.0750 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.4575 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -8.5000 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.0750 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.5000 illegal
|
||||||
|
illegal -76.4575 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_2: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -8.5000 -100.0000
|
||||||
|
-100.0000 -8.5000 -100.0000
|
||||||
|
-100.0000 -8.5000 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4363 illegal
|
||||||
|
illegal -76.8974 illegal
|
||||||
|
illegal -77.2225 illegal
|
||||||
|
illegal -77.1900 illegal
|
||||||
|
illegal -76.8187 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
illegal -15.0025 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4363 illegal
|
||||||
|
illegal -76.8974 illegal
|
||||||
|
illegal -77.2225 illegal
|
||||||
|
illegal -77.1900 illegal
|
||||||
|
illegal -76.8187 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_3: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.0025 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -19.9769 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1737 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4663 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -19.9769 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1737 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4663 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_4: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_5: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_6: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_7: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_8: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_9: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_100: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 -0.8500 -100.0000
|
||||||
|
-100.0000 -9.1502 -100.0000
|
||||||
|
-100.0000 -15.4999 -100.0000
|
||||||
|
-100.0000 -14.4173 -100.0000
|
||||||
|
-100.0000 -7.7350 -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -0.8500 illegal
|
||||||
|
illegal -9.1502 illegal
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
__________ 10.0000 __________
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
-100.0000 illegal -100.0000
|
||||||
|
__________ 1.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -15.4999 illegal
|
||||||
|
illegal -20.3575 illegal
|
||||||
|
illegal -19.5292 illegal
|
||||||
|
illegal -14.4173 illegal
|
||||||
|
illegal -7.7350 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -76.4639 illegal
|
||||||
|
illegal -77.1949 illegal
|
||||||
|
illegal -77.5016 illegal
|
||||||
|
illegal -77.4875 illegal
|
||||||
|
illegal -77.0702 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
__________ exit __________
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
exit south exit
|
||||||
|
exit south exit
|
||||||
|
__________ exit __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
actions: """
|
||||||
|
north
|
||||||
|
east
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
west
|
||||||
|
"""
|
||||||
|
|
||||||
27
p3_rl/test_cases/q1/3-bridge.test
Normal file
27
p3_rl/test_cases/q1/3-bridge.test
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
class: "ValueIterationTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
# 10 #
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 S -100
|
||||||
|
# 1 #
|
||||||
|
"""
|
||||||
|
gridName: "bridgeGrid"
|
||||||
|
discount: "0.85"
|
||||||
|
noise: "0.1"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "500"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
544
p3_rl/test_cases/q1/4-discountgrid.solution
Normal file
544
p3_rl/test_cases/q1/4-discountgrid.solution
Normal file
@@ -0,0 +1,544 @@
|
|||||||
|
values_k_0: """
|
||||||
|
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 __________ 0.0000 0.0000
|
||||||
|
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
0.0000 0.0000 __________ __________ 0.0000
|
||||||
|
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_1: """
|
||||||
|
-10.0000 0.0000 10.0000 0.0000 0.0000
|
||||||
|
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||||
|
-10.0000 0.0000 1.0000 0.0000 0.0000
|
||||||
|
-10.0000 0.0000 __________ __________ 0.0000
|
||||||
|
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.9000 0.0000
|
||||||
|
illegal -0.9000 __________ 0.0000 0.0000
|
||||||
|
illegal -0.8100 illegal 0.0900 0.0000
|
||||||
|
illegal -0.9000 __________ __________ 0.0000
|
||||||
|
illegal -0.9000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal 7.2000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.7200 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.9000 0.0000
|
||||||
|
illegal -0.9000 __________ 0.0000 0.0000
|
||||||
|
illegal -0.8100 illegal 0.0900 0.0000
|
||||||
|
illegal -0.9000 __________ __________ 0.0000
|
||||||
|
illegal -0.9000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal -7.2000 illegal 7.2000 0.0000
|
||||||
|
illegal -7.2000 __________ 0.0000 0.0000
|
||||||
|
illegal -7.2000 illegal 0.7200 0.0000
|
||||||
|
illegal -7.2000 __________ __________ 0.0000
|
||||||
|
illegal -7.2000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_2: """
|
||||||
|
-10.0000 7.2000 10.0000 7.2000 0.0000
|
||||||
|
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||||
|
-10.0000 0.7200 1.0000 0.7200 0.0000
|
||||||
|
-10.0000 0.0000 __________ __________ 0.0000
|
||||||
|
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal 5.1840 illegal 6.0840 0.6480
|
||||||
|
illegal 4.2840 __________ 5.1840 0.0000
|
||||||
|
illegal -0.8100 illegal 0.0900 0.0648
|
||||||
|
illegal -0.3816 __________ __________ 0.0000
|
||||||
|
illegal -0.9000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal 7.8480 illegal 0.6480 0.0000
|
||||||
|
illegal 0.7128 __________ 0.7128 0.0000
|
||||||
|
illegal 0.7200 illegal 0.0648 0.0000
|
||||||
|
illegal 0.0648 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.9000 0.6480
|
||||||
|
illegal -0.3816 __________ 0.5184 0.0000
|
||||||
|
illegal -0.8100 illegal 0.6084 0.0648
|
||||||
|
illegal -0.9000 __________ __________ 0.0000
|
||||||
|
illegal -0.9000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal -6.5520 illegal 7.8480 5.1840
|
||||||
|
illegal -6.4872 __________ 0.7128 0.0000
|
||||||
|
illegal -7.2000 illegal 0.7848 0.5184
|
||||||
|
illegal -7.1352 __________ __________ 0.0000
|
||||||
|
illegal -7.2000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_3: """
|
||||||
|
-10.0000 7.8480 10.0000 7.8480 5.1840
|
||||||
|
-10.0000 4.2840 __________ 5.1840 0.0000
|
||||||
|
-10.0000 0.7200 1.0000 0.7848 0.5184
|
||||||
|
-10.0000 0.0648 __________ __________ 0.0000
|
||||||
|
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal 5.6506 illegal 7.0171 4.9054
|
||||||
|
illegal 5.1361 __________ 6.1171 4.1990
|
||||||
|
illegal 2.2745 illegal 3.8691 0.1173
|
||||||
|
illegal -0.3758 __________ __________ 0.3732
|
||||||
|
illegal -0.8533 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal 8.2919 illegal 4.9054 4.1990
|
||||||
|
illegal 3.8556 __________ 0.7770 0.5132
|
||||||
|
illegal 1.1114 illegal 0.9104 0.3732
|
||||||
|
illegal 0.1115 __________ __________ 0.0467
|
||||||
|
illegal 0.0058 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal 3.0845 illegal 5.0990 1.1729
|
||||||
|
illegal 0.0040 __________ 1.0316 0.8398
|
||||||
|
illegal -0.7633 illegal 0.7017 0.1173
|
||||||
|
illegal -0.8942 __________ __________ 0.0000
|
||||||
|
illegal -0.9000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal -6.1081 illegal 8.3729 6.1171
|
||||||
|
illegal -6.4289 __________ 4.5094 4.2457
|
||||||
|
illegal -6.8086 illegal 1.2572 0.5651
|
||||||
|
illegal -7.1352 __________ __________ 0.0467
|
||||||
|
illegal -7.1942 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_4: """
|
||||||
|
-10.0000 8.2919 10.0000 8.3729 6.1171
|
||||||
|
-10.0000 5.1361 __________ 6.1171 4.2457
|
||||||
|
-10.0000 2.2745 1.0000 3.8691 0.5651
|
||||||
|
-10.0000 0.1115 __________ __________ 0.3732
|
||||||
|
-10.0000 0.0058 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal 5.9702 illegal 7.4790 5.7084
|
||||||
|
illegal 5.5324 __________ 6.9611 5.3370
|
||||||
|
illegal 2.8880 illegal 4.5452 3.4560
|
||||||
|
illegal 0.7477 __________ __________ 0.4740
|
||||||
|
illegal -0.8198 0.0005 0.0000 0.2687
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal 8.4085 illegal 5.7084 5.3370
|
||||||
|
illegal 4.6490 __________ 4.1587 3.6583
|
||||||
|
illegal 1.1923 illegal 1.3056 0.8225
|
||||||
|
illegal 0.2855 __________ __________ 0.3196
|
||||||
|
illegal 0.0106 0.0000 0.0000 0.0336
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal 3.6980 illegal 5.8549 4.3610
|
||||||
|
illegal 1.1999 __________ 3.7184 1.3395
|
||||||
|
illegal -0.7298 illegal 2.9266 0.6678
|
||||||
|
illegal -0.8858 __________ __________ 0.0672
|
||||||
|
illegal -0.8958 0.0005 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal -5.9915 illegal 8.5041 6.9611
|
||||||
|
illegal -6.2490 __________ 5.5061 5.0057
|
||||||
|
illegal -6.7277 illegal 1.6188 3.2015
|
||||||
|
illegal -6.9948 __________ __________ 0.3196
|
||||||
|
illegal -7.1894 0.0042 0.0000 0.0336
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_5: """
|
||||||
|
-10.0000 8.4085 10.0000 8.5041 6.9611
|
||||||
|
-10.0000 5.5324 __________ 6.9611 5.3370
|
||||||
|
-10.0000 2.8880 1.0000 4.5452 3.4560
|
||||||
|
-10.0000 0.7477 __________ __________ 0.4740
|
||||||
|
-10.0000 0.0106 0.0042 0.0000 0.2687
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal 6.0541 illegal 7.6495 6.4039
|
||||||
|
illegal 5.6521 __________ 7.2298 6.1188
|
||||||
|
illegal 3.1733 illegal 5.4130 4.5627
|
||||||
|
illegal 1.2467 __________ __________ 2.5736
|
||||||
|
illegal -0.3613 0.0040 0.0246 0.3655
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal 8.4547 illegal 6.4039 6.1188
|
||||||
|
illegal 5.0000 __________ 5.0171 4.7802
|
||||||
|
illegal 1.2852 illegal 3.5239 3.0113
|
||||||
|
illegal 0.7992 __________ __________ 0.6765
|
||||||
|
illegal 0.0713 0.0008 0.1935 0.2603
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal 3.9833 illegal 6.5385 5.2345
|
||||||
|
illegal 1.6773 __________ 4.3794 3.5951
|
||||||
|
illegal -0.2717 illegal 3.6736 1.0614
|
||||||
|
illegal -0.8251 __________ __________ 0.2788
|
||||||
|
illegal -0.8920 0.0040 0.0246 0.2177
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal -5.9453 illegal 8.5919 7.2298
|
||||||
|
illegal -6.1833 __________ 6.1864 5.9496
|
||||||
|
illegal -6.6348 illegal 1.7556 3.7955
|
||||||
|
illegal -6.9391 __________ __________ 0.6765
|
||||||
|
illegal -7.1318 0.0084 0.0030 0.0668
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_6: """
|
||||||
|
-10.0000 8.4547 10.0000 8.5919 7.2298
|
||||||
|
-10.0000 5.6521 __________ 7.2298 6.1188
|
||||||
|
-10.0000 3.1733 1.0000 5.4130 4.5627
|
||||||
|
-10.0000 1.2467 __________ __________ 2.5736
|
||||||
|
-10.0000 0.0713 0.0084 0.1935 0.3655
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal 6.0874 illegal 7.7368 6.6294
|
||||||
|
illegal 5.6961 __________ 7.3875 6.4068
|
||||||
|
illegal 3.2595 illegal 5.7061 5.3034
|
||||||
|
illegal 1.4970 __________ __________ 3.7484
|
||||||
|
illegal -0.0017 0.0298 0.1730 1.9033
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal 8.4696 illegal 6.6294 6.4068
|
||||||
|
illegal 5.1160 __________ 5.6660 5.4669
|
||||||
|
illegal 1.3409 illegal 4.4230 4.0675
|
||||||
|
illegal 1.1896 __________ __________ 2.2966
|
||||||
|
illegal 0.1246 0.1408 0.2980 0.5277
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal 4.0695 illegal 6.7561 5.8295
|
||||||
|
illegal 1.8935 __________ 5.0988 4.4865
|
||||||
|
illegal 0.0876 illegal 4.3980 2.7508
|
||||||
|
illegal -0.7365 __________ __________ 0.7264
|
||||||
|
illegal -0.8479 0.0298 0.1730 0.3135
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal -5.9304 illegal 8.6239 7.3875
|
||||||
|
illegal -6.1535 __________ 6.4659 6.2668
|
||||||
|
illegal -6.5791 illegal 1.8579 4.6797
|
||||||
|
illegal -6.9080 __________ __________ 2.2966
|
||||||
|
illegal -7.0814 0.0528 0.0408 0.4038
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_7: """
|
||||||
|
-10.0000 8.4696 10.0000 8.6239 7.3875
|
||||||
|
-10.0000 5.6961 __________ 7.3875 6.4068
|
||||||
|
-10.0000 3.2595 1.0000 5.7061 5.3034
|
||||||
|
-10.0000 1.4970 __________ __________ 3.7484
|
||||||
|
-10.0000 0.1246 0.1408 0.2980 1.9033
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal 6.0981 illegal 7.7741 6.7600
|
||||||
|
illegal 5.7108 __________ 7.4507 6.5605
|
||||||
|
illegal 3.2912 illegal 5.8863 5.6038
|
||||||
|
illegal 1.5816 __________ __________ 4.4932
|
||||||
|
illegal 0.1905 0.1394 0.3985 2.8970
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal 8.4749 illegal 6.7600 6.5605
|
||||||
|
illegal 5.1568 __________ 5.9026 5.7551
|
||||||
|
illegal 1.3674 illegal 4.9969 4.7324
|
||||||
|
illegal 1.3824 __________ __________ 3.3475
|
||||||
|
illegal 0.2473 0.2399 1.4240 1.8790
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal 4.1012 illegal 6.8839 6.0539
|
||||||
|
illegal 1.9595 __________ 5.3499 5.0599
|
||||||
|
illegal 0.2678 illegal 4.6757 3.6897
|
||||||
|
illegal -0.6755 __________ __________ 2.0451
|
||||||
|
illegal -0.7976 0.1394 0.3985 1.5685
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal -5.9251 illegal 8.6410 7.4507
|
||||||
|
illegal -6.1444 __________ 6.6087 6.4612
|
||||||
|
illegal -6.5526 illegal 1.8984 5.0224
|
||||||
|
illegal -6.8954 __________ __________ 3.3475
|
||||||
|
illegal -7.0541 0.1151 0.1550 0.7232
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_8: """
|
||||||
|
-10.0000 8.4749 10.0000 8.6410 7.4507
|
||||||
|
-10.0000 5.7108 __________ 7.4507 6.5605
|
||||||
|
-10.0000 3.2912 1.0000 5.8863 5.6038
|
||||||
|
-10.0000 1.5816 __________ __________ 4.4932
|
||||||
|
-10.0000 0.2473 0.2399 1.4240 2.8970
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal 6.1019 illegal 7.7921 6.8128
|
||||||
|
illegal 5.7159 __________ 7.4826 6.6255
|
||||||
|
illegal 3.3017 illegal 5.9589 5.7577
|
||||||
|
illegal 1.6120 __________ __________ 4.8435
|
||||||
|
illegal 0.2603 0.3231 1.3076 3.6240
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal 8.4767 illegal 6.8128 6.6255
|
||||||
|
illegal 5.1707 __________ 6.0310 5.8985
|
||||||
|
illegal 1.3763 illegal 5.2350 5.0295
|
||||||
|
illegal 1.4572 __________ __________ 4.0001
|
||||||
|
illegal 0.3373 1.0685 2.3421 2.7509
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal 4.1117 illegal 6.9351 6.1718
|
||||||
|
illegal 1.9836 __________ 5.4992 5.2957
|
||||||
|
illegal 0.3287 illegal 4.8325 4.2692
|
||||||
|
illegal -0.5796 __________ __________ 2.8946
|
||||||
|
illegal -0.7003 0.3231 1.3076 2.4747
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal -5.9233 illegal 8.6483 7.4826
|
||||||
|
illegal -6.1411 __________ 6.6720 6.5394
|
||||||
|
illegal -6.5437 illegal 1.9203 5.2330
|
||||||
|
illegal -6.8815 __________ __________ 4.0001
|
||||||
|
illegal -7.0354 0.2213 0.4290 1.6904
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_9: """
|
||||||
|
-10.0000 8.4767 10.0000 8.6483 7.4826
|
||||||
|
-10.0000 5.7159 __________ 7.4826 6.6255
|
||||||
|
-10.0000 3.3017 1.0000 5.9589 5.7577
|
||||||
|
-10.0000 1.6120 __________ __________ 4.8435
|
||||||
|
-10.0000 0.3373 1.0685 2.3421 3.6240
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal 6.1032 illegal 7.8002 6.8392
|
||||||
|
illegal 5.7177 __________ 7.4965 6.6572
|
||||||
|
illegal 3.3055 illegal 5.9956 5.8249
|
||||||
|
illegal 1.6223 __________ __________ 5.0174
|
||||||
|
illegal 0.3568 1.0105 2.1087 4.0243
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal 8.4773 illegal 6.8392 6.6572
|
||||||
|
illegal 5.1755 __________ 6.0850 5.9620
|
||||||
|
illegal 1.3795 illegal 5.3553 5.1777
|
||||||
|
illegal 1.4881 __________ __________ 4.3316
|
||||||
|
illegal 0.9447 1.8787 3.0308 3.3713
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal 4.1155 illegal 6.9609 6.2222
|
||||||
|
illegal 1.9917 __________ 5.5601 5.4153
|
||||||
|
illegal 0.3506 illegal 4.8986 4.5418
|
||||||
|
illegal -0.5121 __________ __________ 3.4811
|
||||||
|
illegal -0.5610 1.0105 2.1087 3.1462
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal -5.9227 illegal 8.6518 7.4965
|
||||||
|
illegal -6.1399 __________ 6.7021 6.5791
|
||||||
|
illegal -6.5405 illegal 1.9297 5.3226
|
||||||
|
illegal -6.8725 __________ __________ 4.3316
|
||||||
|
illegal -7.0246 0.4352 1.1909 2.4484
|
||||||
|
"""
|
||||||
|
|
||||||
|
values_k_100: """
|
||||||
|
-10.0000 8.4777 10.0000 8.6547 7.5087
|
||||||
|
-10.0000 5.7186 __________ 7.5087 6.6836
|
||||||
|
-10.0000 3.3074 1.0000 6.0258 5.8841
|
||||||
|
-10.0000 2.0045 __________ __________ 5.1665
|
||||||
|
-10.0000 2.9289 3.4513 3.9306 4.4765
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal 6.1039 illegal 7.8072 6.8610
|
||||||
|
illegal 5.7186 __________ 7.5087 6.6836
|
||||||
|
illegal 3.3074 illegal 6.0258 5.8841
|
||||||
|
illegal 1.6617 __________ __________ 5.1665
|
||||||
|
illegal 0.8539 3.1023 3.5435 4.4765
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal 8.4777 illegal 6.8610 6.6836
|
||||||
|
illegal 5.1780 __________ 6.1334 6.0175
|
||||||
|
illegal 1.4151 illegal 5.4546 5.3030
|
||||||
|
illegal 2.0045 __________ __________ 4.6523
|
||||||
|
illegal 2.9289 3.4513 3.9306 4.0910
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-10.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal 4.1174 illegal 6.9820 6.2669
|
||||||
|
illegal 1.9960 __________ 5.6159 5.5138
|
||||||
|
illegal 0.6333 illegal 4.9582 4.7918
|
||||||
|
illegal 1.3892 __________ __________ 4.1531
|
||||||
|
illegal 1.5194 3.1023 3.5435 3.9797
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal -5.9223 illegal 8.6547 7.5087
|
||||||
|
illegal -6.1393 __________ 6.7275 6.6116
|
||||||
|
illegal -6.5049 illegal 1.9381 5.4051
|
||||||
|
illegal -6.6387 __________ __________ 4.6523
|
||||||
|
illegal -6.7560 2.7300 3.1924 3.6979
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit east exit west west
|
||||||
|
exit north __________ north north
|
||||||
|
exit north exit north north
|
||||||
|
exit east __________ __________ north
|
||||||
|
exit east east east north
|
||||||
|
"""
|
||||||
|
|
||||||
|
actions: """
|
||||||
|
north
|
||||||
|
east
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
west
|
||||||
|
"""
|
||||||
|
|
||||||
24
p3_rl/test_cases/q1/4-discountgrid.test
Normal file
24
p3_rl/test_cases/q1/4-discountgrid.test
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
class: "ValueIterationTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10 _ 10 _ _
|
||||||
|
-10 _ # _ _
|
||||||
|
-10 _ 1 _ _
|
||||||
|
-10 _ # # _
|
||||||
|
-10 S _ _ _
|
||||||
|
"""
|
||||||
|
discount: "0.9"
|
||||||
|
noise: "0.2"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.2"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "3000"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q1/CONFIG
Normal file
2
p3_rl/test_cases/q1/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "6"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
2
p3_rl/test_cases/q2/1-bridge-grid.solution
Normal file
2
p3_rl/test_cases/q2/1-bridge-grid.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q2/1-bridge-grid.test.
|
||||||
|
# File intentionally blank.
|
||||||
29
p3_rl/test_cases/q2/1-bridge-grid.test
Normal file
29
p3_rl/test_cases/q2/1-bridge-grid.test
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question2"
|
||||||
|
question2: "true"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
# -100 -100 -100 -100 -100 #
|
||||||
|
1 S _ _ _ _ 10
|
||||||
|
# -100 -100 -100 -100 -100 #
|
||||||
|
"""
|
||||||
|
gridName: "bridgeGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
_ _ _ _ _ _ _
|
||||||
|
_ E _ _ _ _ _
|
||||||
|
_ _ _ _ _ _ _
|
||||||
|
"""
|
||||||
|
|
||||||
2
p3_rl/test_cases/q2/CONFIG
Normal file
2
p3_rl/test_cases/q2/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "1"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
2
p3_rl/test_cases/q3/1-question-3.1.solution
Normal file
2
p3_rl/test_cases/q3/1-question-3.1.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q3/1-question-3.1.test.
|
||||||
|
# File intentionally blank.
|
||||||
31
p3_rl/test_cases/q3/1-question-3.1.test
Normal file
31
p3_rl/test_cases/q3/1-question-3.1.test
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question3a"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ # _ _ _
|
||||||
|
_ # 1 # 10
|
||||||
|
S _ _ _ _
|
||||||
|
-10 -10 -10 -10 -10
|
||||||
|
"""
|
||||||
|
gridName: "discountGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
E E N _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
"""
|
||||||
2
p3_rl/test_cases/q3/2-question-3.2.solution
Normal file
2
p3_rl/test_cases/q3/2-question-3.2.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q3/2-question-3.2.test.
|
||||||
|
# File intentionally blank.
|
||||||
31
p3_rl/test_cases/q3/2-question-3.2.test
Normal file
31
p3_rl/test_cases/q3/2-question-3.2.test
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question3b"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ # _ _ _
|
||||||
|
_ # 1 # 10
|
||||||
|
S _ _ _ _
|
||||||
|
-10 -10 -10 -10 -10
|
||||||
|
"""
|
||||||
|
gridName: "discountGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
E E S _ _
|
||||||
|
N _ S _ _
|
||||||
|
N _ _ _ _
|
||||||
|
N _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
"""
|
||||||
2
p3_rl/test_cases/q3/3-question-3.3.solution
Normal file
2
p3_rl/test_cases/q3/3-question-3.3.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q3/3-question-3.3.test.
|
||||||
|
# File intentionally blank.
|
||||||
31
p3_rl/test_cases/q3/3-question-3.3.test
Normal file
31
p3_rl/test_cases/q3/3-question-3.3.test
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question3c"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ # _ _ _
|
||||||
|
_ # 1 # 10
|
||||||
|
S _ _ _ _
|
||||||
|
-10 -10 -10 -10 -10
|
||||||
|
"""
|
||||||
|
gridName: "discountGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
E E E E N
|
||||||
|
_ _ _ _ _
|
||||||
|
"""
|
||||||
2
p3_rl/test_cases/q3/4-question-3.4.solution
Normal file
2
p3_rl/test_cases/q3/4-question-3.4.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q3/4-question-3.4.test.
|
||||||
|
# File intentionally blank.
|
||||||
36
p3_rl/test_cases/q3/4-question-3.4.test
Normal file
36
p3_rl/test_cases/q3/4-question-3.4.test
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question3d"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ # _ _ _
|
||||||
|
_ # 1 # 10
|
||||||
|
S _ _ _ _
|
||||||
|
-10 -10 -10 -10 -10
|
||||||
|
"""
|
||||||
|
gridName: "discountGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
N _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
"""
|
||||||
|
|
||||||
|
# State the most probable path must visit
|
||||||
|
# (x,y) for a particular location; (0,0) is bottom left
|
||||||
|
# TERMINAL_STATE for the terminal state
|
||||||
|
pathVisits: "(4,2)"
|
||||||
2
p3_rl/test_cases/q3/5-question-3.5.solution
Normal file
2
p3_rl/test_cases/q3/5-question-3.5.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q3/5-question-3.5.test.
|
||||||
|
# File intentionally blank.
|
||||||
36
p3_rl/test_cases/q3/5-question-3.5.test
Normal file
36
p3_rl/test_cases/q3/5-question-3.5.test
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
class: "GridPolicyTest"
|
||||||
|
|
||||||
|
# Function in module in analysis that returns (discount, noise)
|
||||||
|
parameterFn: "question3e"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ # _ _ _
|
||||||
|
_ # 1 # 10
|
||||||
|
S _ _ _ _
|
||||||
|
-10 -10 -10 -10 -10
|
||||||
|
"""
|
||||||
|
gridName: "discountGrid"
|
||||||
|
|
||||||
|
# Policy specification
|
||||||
|
# _ policy choice not checked
|
||||||
|
# N, E, S, W policy action must be north, east, south, west
|
||||||
|
#
|
||||||
|
policy: """
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
_ _ _ _ _
|
||||||
|
"""
|
||||||
|
|
||||||
|
# State the most probable path must not visit
|
||||||
|
# (x,y) for a particular location; (0,0) is bottom left
|
||||||
|
# TERMINAL_STATE for the terminal state
|
||||||
|
pathNotVisits: "TERMINAL_STATE"
|
||||||
2
p3_rl/test_cases/q3/CONFIG
Normal file
2
p3_rl/test_cases/q3/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "5"
|
||||||
|
class: "NumberPassedQuestion"
|
||||||
342
p3_rl/test_cases/q4/1-tinygrid.solution
Normal file
342
p3_rl/test_cases/q4/1-tinygrid.solution
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.9000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.0951
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.6856
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-0.4534
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
0.4063
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-9.4767
|
||||||
|
illegal
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
2.1267
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
0.3919
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values: """
|
||||||
|
-9.4767
|
||||||
|
2.1267
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
exit
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q4/1-tinygrid.test
Normal file
22
p3_rl/test_cases/q4/1-tinygrid.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "QLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.5"
|
||||||
|
noise: "0.0"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
342
p3_rl/test_cases/q4/2-tinygrid-noisy.solution
Normal file
342
p3_rl/test_cases/q4/2-tinygrid-noisy.solution
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.9000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.0951
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.6856
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-0.6670
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
0.9499
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-9.4767
|
||||||
|
illegal
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
3.2562
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
0.8236
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
values: """
|
||||||
|
-9.4767
|
||||||
|
3.2562
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit
|
||||||
|
south
|
||||||
|
exit
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q4/2-tinygrid-noisy.test
Normal file
22
p3_rl/test_cases/q4/2-tinygrid-noisy.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "QLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.75"
|
||||||
|
noise: "0.25"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
570
p3_rl/test_cases/q4/3-bridge.solution
Normal file
570
p3_rl/test_cases/q4/3-bridge.solution
Normal file
@@ -0,0 +1,570 @@
|
|||||||
|
q_values_k_0_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.1000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -5.8648 illegal
|
||||||
|
illegal -0.7995 illegal
|
||||||
|
illegal -0.1671 illegal
|
||||||
|
illegal -1.2642 illegal
|
||||||
|
illegal -0.5871 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -17.0676 illegal
|
||||||
|
illegal -26.5534 illegal
|
||||||
|
illegal -3.6957 illegal
|
||||||
|
illegal -43.5952 illegal
|
||||||
|
illegal -31.6884 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_exit: """
|
||||||
|
__________ 9.3539 __________
|
||||||
|
-96.5663 illegal -96.9097
|
||||||
|
-97.7472 illegal -94.1850
|
||||||
|
-89.0581 illegal -96.9097
|
||||||
|
-97.2187 illegal -87.8423
|
||||||
|
-92.8210 illegal -97.2187
|
||||||
|
__________ 0.9576 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -6.8377 illegal
|
||||||
|
illegal -6.7277 illegal
|
||||||
|
illegal -3.4723 illegal
|
||||||
|
illegal -8.4015 illegal
|
||||||
|
illegal -5.5718 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -27.0626 illegal
|
||||||
|
illegal -39.0610 illegal
|
||||||
|
illegal -40.5887 illegal
|
||||||
|
illegal -16.2839 illegal
|
||||||
|
illegal -20.7770 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
values: """
|
||||||
|
__________ 9.3539 __________
|
||||||
|
-96.5663 -5.8648 -96.9097
|
||||||
|
-97.7472 -0.7995 -94.1850
|
||||||
|
-89.0581 -0.1671 -96.9097
|
||||||
|
-97.2187 -1.2642 -87.8423
|
||||||
|
-92.8210 -0.5871 -97.2187
|
||||||
|
__________ 0.9576 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
__________ exit __________
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
exit north exit
|
||||||
|
__________ exit __________
|
||||||
|
"""
|
||||||
|
|
||||||
27
p3_rl/test_cases/q4/3-bridge.test
Normal file
27
p3_rl/test_cases/q4/3-bridge.test
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
class: "QLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
# 10 #
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 S -100
|
||||||
|
# 1 #
|
||||||
|
"""
|
||||||
|
gridName: "bridgeGrid"
|
||||||
|
discount: "0.85"
|
||||||
|
noise: "0.1"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "500"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
456
p3_rl/test_cases/q4/4-discountgrid.solution
Normal file
456
p3_rl/test_cases/q4/4-discountgrid.solution
Normal file
@@ -0,0 +1,456 @@
|
|||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
-1.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
-1.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.1000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
-1.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.1000 illegal illegal
|
||||||
|
-1.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal -0.0900 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
-1.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.1000 illegal illegal
|
||||||
|
-1.0000 illegal __________ __________ illegal
|
||||||
|
-1.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_north: """
|
||||||
|
illegal 4.3205 illegal 6.1517 3.8095
|
||||||
|
illegal 4.4238 __________ 5.2284 3.5129
|
||||||
|
illegal 1.0694 illegal 3.6867 2.0418
|
||||||
|
illegal 0.3423 __________ __________ 1.0655
|
||||||
|
illegal 0.0073 0.0079 0.0484 0.3768
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_east: """
|
||||||
|
illegal 8.0584 illegal 3.7245 3.3947
|
||||||
|
illegal 2.0499 __________ 3.2373 2.1742
|
||||||
|
illegal 0.8687 illegal 1.7398 1.2671
|
||||||
|
illegal 0.2927 __________ __________ 0.6669
|
||||||
|
illegal 0.0239 0.0097 0.1611 0.2051
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_exit: """
|
||||||
|
-10.0000 illegal 10.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ illegal illegal
|
||||||
|
-10.0000 illegal 1.0000 illegal illegal
|
||||||
|
-10.0000 illegal __________ __________ illegal
|
||||||
|
-9.9999 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_south: """
|
||||||
|
illegal -0.3521 illegal 3.6948 2.9139
|
||||||
|
illegal -0.5605 __________ 2.1346 1.5674
|
||||||
|
illegal 0.2093 illegal 1.5389 0.5521
|
||||||
|
illegal -0.5505 __________ __________ 0.1006
|
||||||
|
illegal -1.8501 0.0060 0.0514 0.1223
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_west: """
|
||||||
|
illegal -6.2001 illegal 7.5146 4.9014
|
||||||
|
illegal -5.4013 __________ 4.0484 3.4126
|
||||||
|
illegal -8.0399 illegal 0.9653 1.6081
|
||||||
|
illegal -7.4767 __________ __________ 0.3934
|
||||||
|
illegal -6.3432 0.0179 0.0188 0.1028
|
||||||
|
"""
|
||||||
|
|
||||||
|
values: """
|
||||||
|
-10.0000 8.0584 10.0000 7.5146 4.9014
|
||||||
|
-10.0000 4.4238 __________ 5.2284 3.5129
|
||||||
|
-10.0000 1.0694 1.0000 3.6867 2.0418
|
||||||
|
-10.0000 0.3423 __________ __________ 1.0655
|
||||||
|
-9.9999 0.0239 0.0179 0.1611 0.3768
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: """
|
||||||
|
exit east exit west west
|
||||||
|
exit north __________ north north
|
||||||
|
exit north exit north north
|
||||||
|
exit north __________ __________ north
|
||||||
|
exit east west east north
|
||||||
|
"""
|
||||||
|
|
||||||
24
p3_rl/test_cases/q4/4-discountgrid.test
Normal file
24
p3_rl/test_cases/q4/4-discountgrid.test
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
class: "QLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10 _ 10 _ _
|
||||||
|
-10 _ # _ _
|
||||||
|
-10 _ 1 _ _
|
||||||
|
-10 _ # # _
|
||||||
|
-10 S _ _ _
|
||||||
|
"""
|
||||||
|
discount: "0.9"
|
||||||
|
noise: "0.2"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.2"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "3000"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q4/CONFIG
Normal file
2
p3_rl/test_cases/q4/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "5"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
2
p3_rl/test_cases/q5/1-tinygrid.solution
Normal file
2
p3_rl/test_cases/q5/1-tinygrid.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q5/1-tinygrid.test.
|
||||||
|
# File intentionally blank.
|
||||||
22
p3_rl/test_cases/q5/1-tinygrid.test
Normal file
22
p3_rl/test_cases/q5/1-tinygrid.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "EpsilonGreedyTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.5"
|
||||||
|
noise: "0.0"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q5/2-tinygrid-noisy.solution
Normal file
2
p3_rl/test_cases/q5/2-tinygrid-noisy.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q5/2-tinygrid-noisy.test.
|
||||||
|
# File intentionally blank.
|
||||||
22
p3_rl/test_cases/q5/2-tinygrid-noisy.test
Normal file
22
p3_rl/test_cases/q5/2-tinygrid-noisy.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "EpsilonGreedyTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.75"
|
||||||
|
noise: "0.25"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q5/3-bridge.solution
Normal file
2
p3_rl/test_cases/q5/3-bridge.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q5/3-bridge.test.
|
||||||
|
# File intentionally blank.
|
||||||
27
p3_rl/test_cases/q5/3-bridge.test
Normal file
27
p3_rl/test_cases/q5/3-bridge.test
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
class: "EpsilonGreedyTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
# 10 #
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 S -100
|
||||||
|
# 1 #
|
||||||
|
"""
|
||||||
|
gridName: "bridgeGrid"
|
||||||
|
discount: "0.85"
|
||||||
|
noise: "0.1"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "500"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q5/4-discountgrid.solution
Normal file
2
p3_rl/test_cases/q5/4-discountgrid.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q5/4-discountgrid.test.
|
||||||
|
# File intentionally blank.
|
||||||
24
p3_rl/test_cases/q5/4-discountgrid.test
Normal file
24
p3_rl/test_cases/q5/4-discountgrid.test
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
class: "EpsilonGreedyTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10 _ 10 _ _
|
||||||
|
-10 _ # _ _
|
||||||
|
-10 _ 1 _ _
|
||||||
|
-10 _ # # _
|
||||||
|
-10 S _ _ _
|
||||||
|
"""
|
||||||
|
discount: "0.9"
|
||||||
|
noise: "0.2"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.2"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "3000"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q5/CONFIG
Normal file
2
p3_rl/test_cases/q5/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "3"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
2
p3_rl/test_cases/q6/CONFIG
Normal file
2
p3_rl/test_cases/q6/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "1"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
2
p3_rl/test_cases/q6/grade-agent.solution
Normal file
2
p3_rl/test_cases/q6/grade-agent.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q6/grade-agent.test.
|
||||||
|
# File intentionally blank.
|
||||||
2
p3_rl/test_cases/q6/grade-agent.test
Normal file
2
p3_rl/test_cases/q6/grade-agent.test
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
class: "Question6Test"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q7/CONFIG
Normal file
2
p3_rl/test_cases/q7/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "1"
|
||||||
|
class: "PartialCreditQuestion"
|
||||||
2
p3_rl/test_cases/q7/grade-agent.solution
Normal file
2
p3_rl/test_cases/q7/grade-agent.solution
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is the solution file for test_cases/q7/grade-agent.test.
|
||||||
|
# File intentionally blank.
|
||||||
6
p3_rl/test_cases/q7/grade-agent.test
Normal file
6
p3_rl/test_cases/q7/grade-agent.test
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
class: "EvalAgentTest"
|
||||||
|
|
||||||
|
# 100 test games after 2000 training games
|
||||||
|
pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
|
||||||
|
|
||||||
|
winsThresholds: "70"
|
||||||
429
p3_rl/test_cases/q8/1-tinygrid.solution
Normal file
429
p3_rl/test_cases/q8/1-tinygrid.solution
Normal file
@@ -0,0 +1,429 @@
|
|||||||
|
weights_k_0: """
|
||||||
|
{((0, 0), 'exit'): 0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_1: """
|
||||||
|
{((0, 0), 'exit'): 1.0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_2: """
|
||||||
|
{((0, 0), 'exit'): 1.0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_3: """
|
||||||
|
{((0, 0), 'exit'): 1.9,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.9000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_4: """
|
||||||
|
{((0, 0), 'exit'): 2.71,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_5: """
|
||||||
|
{((0, 0), 'exit'): 2.71,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_6: """
|
||||||
|
{((0, 0), 'exit'): 3.439,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_7: """
|
||||||
|
{((0, 0), 'exit'): 3.439,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.17195000000000002,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_8: """
|
||||||
|
{((0, 0), 'exit'): 4.0951,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.17195000000000002,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.0951
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_9: """
|
||||||
|
{((0, 0), 'exit'): 4.68559,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.17195000000000002,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.6856
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
0.1720
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_100: """
|
||||||
|
{((0, 0), 'exit'): 9.817519963685992,
|
||||||
|
((0, 1), 'east'): 0.40629236674335106,
|
||||||
|
((0, 1), 'north'): -0.4534185789984799,
|
||||||
|
((0, 1), 'south'): 2.126721095524319,
|
||||||
|
((0, 1), 'west'): 0.39193283364906867,
|
||||||
|
((0, 2), 'exit'): -9.476652366972639}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-0.4534
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
0.4063
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-9.4767
|
||||||
|
illegal
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
2.1267
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
0.3919
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q8/1-tinygrid.test
Normal file
22
p3_rl/test_cases/q8/1-tinygrid.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "ApproximateQLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.5"
|
||||||
|
noise: "0.0"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
429
p3_rl/test_cases/q8/2-tinygrid-noisy.solution
Normal file
429
p3_rl/test_cases/q8/2-tinygrid-noisy.solution
Normal file
@@ -0,0 +1,429 @@
|
|||||||
|
weights_k_0: """
|
||||||
|
{((0, 0), 'exit'): 0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_1: """
|
||||||
|
{((0, 0), 'exit'): 1.0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_2: """
|
||||||
|
{((0, 0), 'exit'): 1.0,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_3: """
|
||||||
|
{((0, 0), 'exit'): 1.9,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
1.9000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_4: """
|
||||||
|
{((0, 0), 'exit'): 2.71,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_5: """
|
||||||
|
{((0, 0), 'exit'): 2.71,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
2.7100
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_6: """
|
||||||
|
{((0, 0), 'exit'): 3.439,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.0,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_7: """
|
||||||
|
{((0, 0), 'exit'): 3.439,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.257925,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
3.4390
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_8: """
|
||||||
|
{((0, 0), 'exit'): 4.0951,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.257925,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.0951
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_9: """
|
||||||
|
{((0, 0), 'exit'): 4.68559,
|
||||||
|
((0, 1), 'east'): 0,
|
||||||
|
((0, 1), 'north'): 0,
|
||||||
|
((0, 1), 'south'): 0.257925,
|
||||||
|
((0, 1), 'west'): 0,
|
||||||
|
((0, 2), 'exit'): -1.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-1.0000
|
||||||
|
illegal
|
||||||
|
4.6856
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal
|
||||||
|
0.2579
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal
|
||||||
|
0.0000
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_100: """
|
||||||
|
{((0, 0), 'exit'): 9.817519963685992,
|
||||||
|
((0, 1), 'east'): 0.9498968104823575,
|
||||||
|
((0, 1), 'north'): -0.66699795412272,
|
||||||
|
((0, 1), 'south'): 3.256207905310105,
|
||||||
|
((0, 1), 'west'): 0.8236280735014627,
|
||||||
|
((0, 2), 'exit'): -9.476652366972639}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_north: """
|
||||||
|
illegal
|
||||||
|
-0.6670
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_east: """
|
||||||
|
illegal
|
||||||
|
0.9499
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_exit: """
|
||||||
|
-9.4767
|
||||||
|
illegal
|
||||||
|
9.8175
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_south: """
|
||||||
|
illegal
|
||||||
|
3.2562
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_100_action_west: """
|
||||||
|
illegal
|
||||||
|
0.8236
|
||||||
|
illegal
|
||||||
|
"""
|
||||||
|
|
||||||
22
p3_rl/test_cases/q8/2-tinygrid-noisy.test
Normal file
22
p3_rl/test_cases/q8/2-tinygrid-noisy.test
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class: "ApproximateQLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10
|
||||||
|
S
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
discount: "0.75"
|
||||||
|
noise: "0.25"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "100"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
935
p3_rl/test_cases/q8/3-bridge.solution
Normal file
935
p3_rl/test_cases/q8/3-bridge.solution
Normal file
@@ -0,0 +1,935 @@
|
|||||||
|
weights_k_0: """
|
||||||
|
{((0, 1), 'exit'): 0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): 0,
|
||||||
|
((0, 5), 'exit'): 0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): 0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_1: """
|
||||||
|
{((0, 1), 'exit'): 0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): 0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): 0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_2: """
|
||||||
|
{((0, 1), 'exit'): 0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): 0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): 0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_3: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): 0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): 0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_4: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): 0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_5: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): 0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): -10.0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
0.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_6: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): -10.0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): -10.0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_7: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): 0,
|
||||||
|
((0, 3), 'exit'): -10.0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0.0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): -10.0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
0.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_8: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): -10.0,
|
||||||
|
((0, 3), 'exit'): -10.0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0.0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): -10.0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.0000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_9: """
|
||||||
|
{((0, 1), 'exit'): -10.0,
|
||||||
|
((0, 2), 'exit'): -10.0,
|
||||||
|
((0, 3), 'exit'): -10.0,
|
||||||
|
((0, 4), 'exit'): -10.0,
|
||||||
|
((0, 5), 'exit'): -10.0,
|
||||||
|
((1, 0), 'exit'): 0.1,
|
||||||
|
((1, 1), 'east'): 0,
|
||||||
|
((1, 1), 'north'): 0,
|
||||||
|
((1, 1), 'south'): 0,
|
||||||
|
((1, 1), 'west'): 0,
|
||||||
|
((1, 2), 'east'): 0,
|
||||||
|
((1, 2), 'north'): 0,
|
||||||
|
((1, 2), 'south'): 0,
|
||||||
|
((1, 2), 'west'): 0,
|
||||||
|
((1, 3), 'east'): 0,
|
||||||
|
((1, 3), 'north'): 0,
|
||||||
|
((1, 3), 'south'): 0.0,
|
||||||
|
((1, 3), 'west'): 0,
|
||||||
|
((1, 4), 'east'): 0,
|
||||||
|
((1, 4), 'north'): 0,
|
||||||
|
((1, 4), 'south'): 0,
|
||||||
|
((1, 4), 'west'): 0,
|
||||||
|
((1, 5), 'east'): 0,
|
||||||
|
((1, 5), 'north'): 0,
|
||||||
|
((1, 5), 'south'): 0.0,
|
||||||
|
((1, 5), 'west'): 0,
|
||||||
|
((1, 6), 'exit'): 0,
|
||||||
|
((2, 1), 'exit'): 0,
|
||||||
|
((2, 2), 'exit'): 0,
|
||||||
|
((2, 3), 'exit'): -10.0,
|
||||||
|
((2, 4), 'exit'): 0,
|
||||||
|
((2, 5), 'exit'): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
__________ 0.0000 __________
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal -10.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
-10.0000 illegal 0.0000
|
||||||
|
__________ 0.1000 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
illegal 0.0000 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_500: """
|
||||||
|
{((0, 1), 'exit'): -92.82102012308148,
|
||||||
|
((0, 2), 'exit'): -97.21871610556306,
|
||||||
|
((0, 3), 'exit'): -89.05810108684878,
|
||||||
|
((0, 4), 'exit'): -97.74716004550608,
|
||||||
|
((0, 5), 'exit'): -96.56631617970748,
|
||||||
|
((1, 0), 'exit'): 0.9576088417247839,
|
||||||
|
((1, 1), 'east'): -31.68839649871871,
|
||||||
|
((1, 1), 'north'): -0.5871409700255297,
|
||||||
|
((1, 1), 'south'): -5.571799344704395,
|
||||||
|
((1, 1), 'west'): -20.777007017445538,
|
||||||
|
((1, 2), 'east'): -43.595242197319,
|
||||||
|
((1, 2), 'north'): -1.264202431807023,
|
||||||
|
((1, 2), 'south'): -8.401530599975509,
|
||||||
|
((1, 2), 'west'): -16.283916171605192,
|
||||||
|
((1, 3), 'east'): -3.6956691,
|
||||||
|
((1, 3), 'north'): -0.16712710492783758,
|
||||||
|
((1, 3), 'south'): -3.4722840178579073,
|
||||||
|
((1, 3), 'west'): -40.58867937480968,
|
||||||
|
((1, 4), 'east'): -26.553386621338632,
|
||||||
|
((1, 4), 'north'): -0.799493322153628,
|
||||||
|
((1, 4), 'south'): -6.727671187497919,
|
||||||
|
((1, 4), 'west'): -39.06095135014759,
|
||||||
|
((1, 5), 'east'): -17.067638934181446,
|
||||||
|
((1, 5), 'north'): -5.864753060887024,
|
||||||
|
((1, 5), 'south'): -6.83769420759525,
|
||||||
|
((1, 5), 'west'): -27.062643066307515,
|
||||||
|
((1, 6), 'exit'): 9.353891811077332,
|
||||||
|
((2, 1), 'exit'): -97.21871610556306,
|
||||||
|
((2, 2), 'exit'): -87.84233454094309,
|
||||||
|
((2, 3), 'exit'): -96.90968456173674,
|
||||||
|
((2, 4), 'exit'): -94.185026299696,
|
||||||
|
((2, 5), 'exit'): -96.90968456173674}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_north: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -5.8648 illegal
|
||||||
|
illegal -0.7995 illegal
|
||||||
|
illegal -0.1671 illegal
|
||||||
|
illegal -1.2642 illegal
|
||||||
|
illegal -0.5871 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_east: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -17.0676 illegal
|
||||||
|
illegal -26.5534 illegal
|
||||||
|
illegal -3.6957 illegal
|
||||||
|
illegal -43.5952 illegal
|
||||||
|
illegal -31.6884 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_exit: """
|
||||||
|
__________ 9.3539 __________
|
||||||
|
-96.5663 illegal -96.9097
|
||||||
|
-97.7472 illegal -94.1850
|
||||||
|
-89.0581 illegal -96.9097
|
||||||
|
-97.2187 illegal -87.8423
|
||||||
|
-92.8210 illegal -97.2187
|
||||||
|
__________ 0.9576 __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_south: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -6.8377 illegal
|
||||||
|
illegal -6.7277 illegal
|
||||||
|
illegal -3.4723 illegal
|
||||||
|
illegal -8.4015 illegal
|
||||||
|
illegal -5.5718 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_500_action_west: """
|
||||||
|
__________ illegal __________
|
||||||
|
illegal -27.0626 illegal
|
||||||
|
illegal -39.0610 illegal
|
||||||
|
illegal -40.5887 illegal
|
||||||
|
illegal -16.2839 illegal
|
||||||
|
illegal -20.7770 illegal
|
||||||
|
__________ illegal __________
|
||||||
|
"""
|
||||||
|
|
||||||
27
p3_rl/test_cases/q8/3-bridge.test
Normal file
27
p3_rl/test_cases/q8/3-bridge.test
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
class: "ApproximateQLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
# 10 #
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 _ -100
|
||||||
|
-100 S -100
|
||||||
|
# 1 #
|
||||||
|
"""
|
||||||
|
gridName: "bridgeGrid"
|
||||||
|
discount: "0.85"
|
||||||
|
noise: "0.1"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.5"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "500"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
1210
p3_rl/test_cases/q8/4-discountgrid.solution
Normal file
1210
p3_rl/test_cases/q8/4-discountgrid.solution
Normal file
File diff suppressed because it is too large
Load Diff
24
p3_rl/test_cases/q8/4-discountgrid.test
Normal file
24
p3_rl/test_cases/q8/4-discountgrid.test
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
class: "ApproximateQLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10 _ 10 _ _
|
||||||
|
-10 _ # _ _
|
||||||
|
-10 _ 1 _ _
|
||||||
|
-10 _ # # _
|
||||||
|
-10 S _ _ _
|
||||||
|
"""
|
||||||
|
discount: "0.9"
|
||||||
|
noise: "0.2"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.2"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "3000"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
|
||||||
880
p3_rl/test_cases/q8/5-coord-extractor.solution
Normal file
880
p3_rl/test_cases/q8/5-coord-extractor.solution
Normal file
@@ -0,0 +1,880 @@
|
|||||||
|
weights_k_0: """
|
||||||
|
{'action=east': 0,
|
||||||
|
'action=exit': 0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0,
|
||||||
|
'action=west': 0,
|
||||||
|
'x=0': 0,
|
||||||
|
'x=1': 0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0,
|
||||||
|
'x=4': 0,
|
||||||
|
'y=0': 0,
|
||||||
|
'y=1': 0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0,
|
||||||
|
'y=4': 0,
|
||||||
|
(0, 0): 0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_0_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_1: """
|
||||||
|
{'action=east': 0,
|
||||||
|
'action=exit': 0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0,
|
||||||
|
'x=0': 0,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0,
|
||||||
|
'x=4': 0,
|
||||||
|
'y=0': 0,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0,
|
||||||
|
'y=4': 0,
|
||||||
|
(0, 0): 0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_1_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_2: """
|
||||||
|
{'action=east': 0,
|
||||||
|
'action=exit': 0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0,
|
||||||
|
'x=0': 0,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0,
|
||||||
|
'y=0': 0,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0,
|
||||||
|
(0, 0): 0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_exit: """
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ illegal illegal
|
||||||
|
0.0000 illegal 0.0000 illegal illegal
|
||||||
|
0.0000 illegal __________ __________ illegal
|
||||||
|
0.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_2_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_3: """
|
||||||
|
{'action=east': 0,
|
||||||
|
'action=exit': -1.0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0,
|
||||||
|
'x=0': -1.0,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0,
|
||||||
|
'y=0': -1.0,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_exit: """
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ illegal illegal
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ __________ illegal
|
||||||
|
-4.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_4: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0,
|
||||||
|
'x=0': -1.0,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0,
|
||||||
|
'y=0': -1.0,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_exit: """
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ illegal illegal
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ __________ illegal
|
||||||
|
-4.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_4_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_5: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.0,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0.0,
|
||||||
|
'x=0': -1.0,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0.0,
|
||||||
|
'y=0': -1.0,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0.0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): 0,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0.0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_exit: """
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ illegal illegal
|
||||||
|
-3.0000 illegal -1.0000 illegal illegal
|
||||||
|
-3.0000 illegal __________ __________ illegal
|
||||||
|
-4.0000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_5_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_6: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.7000000000000002,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0.0,
|
||||||
|
'x=0': -1.7000000000000002,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0.0,
|
||||||
|
'y=0': -1.7000000000000002,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0.0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): -0.7000000000000001,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0.0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_exit: """
|
||||||
|
-5.1000 illegal -1.7000 illegal illegal
|
||||||
|
-5.8000 illegal __________ illegal illegal
|
||||||
|
-5.1000 illegal -1.7000 illegal illegal
|
||||||
|
-5.1000 illegal __________ __________ illegal
|
||||||
|
-6.1000 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_6_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.0000 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_7: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.4300000000000002,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0.0,
|
||||||
|
'x=0': -1.7000000000000002,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0.27,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0.0,
|
||||||
|
'y=0': -1.7000000000000002,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0.27,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0.0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): 0,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): -0.7000000000000001,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0.27,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0.0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_exit: """
|
||||||
|
-4.8300 illegal -0.8900 illegal illegal
|
||||||
|
-5.5300 illegal __________ illegal illegal
|
||||||
|
-4.8300 illegal -0.6200 illegal illegal
|
||||||
|
-4.8300 illegal __________ __________ illegal
|
||||||
|
-5.8300 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_7_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_8: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.947,
|
||||||
|
'action=north': 0,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0.0,
|
||||||
|
'x=0': -2.217,
|
||||||
|
'x=1': 0.0,
|
||||||
|
'x=2': 0.27,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0.0,
|
||||||
|
'y=0': -2.217,
|
||||||
|
'y=1': 0.0,
|
||||||
|
'y=2': 0.27,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0.0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): -0.517,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): -0.7000000000000001,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): 0,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0.27,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0.0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_north: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_east: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_exit: """
|
||||||
|
-6.3810 illegal -1.4070 illegal illegal
|
||||||
|
-7.0810 illegal __________ illegal illegal
|
||||||
|
-6.3810 illegal -1.1370 illegal illegal
|
||||||
|
-6.8980 illegal __________ __________ illegal
|
||||||
|
-7.3810 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_south: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_8_action_west: """
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ 0.0000 0.0000
|
||||||
|
illegal 0.0000 illegal 0.0000 0.0000
|
||||||
|
illegal 0.0000 __________ __________ 0.0000
|
||||||
|
illegal 0.0000 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_9: """
|
||||||
|
{'action=east': 0.0,
|
||||||
|
'action=exit': -1.947,
|
||||||
|
'action=north': -0.62082,
|
||||||
|
'action=south': 0.0,
|
||||||
|
'action=west': 0.0,
|
||||||
|
'x=0': -2.217,
|
||||||
|
'x=1': -0.62082,
|
||||||
|
'x=2': 0.27,
|
||||||
|
'x=3': 0.0,
|
||||||
|
'x=4': 0.0,
|
||||||
|
'y=0': -2.217,
|
||||||
|
'y=1': -0.62082,
|
||||||
|
'y=2': 0.27,
|
||||||
|
'y=3': 0.0,
|
||||||
|
'y=4': 0.0,
|
||||||
|
(0, 0): -1.0,
|
||||||
|
(0, 1): -0.517,
|
||||||
|
(0, 2): 0,
|
||||||
|
(0, 3): -0.7000000000000001,
|
||||||
|
(0, 4): 0,
|
||||||
|
(1, 0): 0.0,
|
||||||
|
(1, 1): -0.62082,
|
||||||
|
(1, 2): 0,
|
||||||
|
(1, 3): 0,
|
||||||
|
(1, 4): 0,
|
||||||
|
(2, 0): 0,
|
||||||
|
(2, 2): 0.27,
|
||||||
|
(2, 4): 0,
|
||||||
|
(3, 0): 0.0,
|
||||||
|
(3, 2): 0,
|
||||||
|
(3, 3): 0,
|
||||||
|
(3, 4): 0,
|
||||||
|
(4, 0): 0,
|
||||||
|
(4, 1): 0.0,
|
||||||
|
(4, 2): 0,
|
||||||
|
(4, 3): 0,
|
||||||
|
(4, 4): 0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_north: """
|
||||||
|
illegal -1.8625 illegal -0.6208 -0.6208
|
||||||
|
illegal -1.8625 __________ -0.6208 -0.6208
|
||||||
|
illegal -1.8625 illegal -0.6208 -0.6208
|
||||||
|
illegal -2.4833 __________ __________ -0.6208
|
||||||
|
illegal -1.8625 -0.0808 -0.6208 -0.6208
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_east: """
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.2416 __________ 0.0000 0.0000
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.8625 __________ __________ 0.0000
|
||||||
|
illegal -1.2416 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_exit: """
|
||||||
|
-6.3810 illegal -1.4070 illegal illegal
|
||||||
|
-7.0810 illegal __________ illegal illegal
|
||||||
|
-6.3810 illegal -1.1370 illegal illegal
|
||||||
|
-6.8980 illegal __________ __________ illegal
|
||||||
|
-7.3810 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_south: """
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.2416 __________ 0.0000 0.0000
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.8625 __________ __________ 0.0000
|
||||||
|
illegal -1.2416 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_9_action_west: """
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.2416 __________ 0.0000 0.0000
|
||||||
|
illegal -1.2416 illegal 0.0000 0.0000
|
||||||
|
illegal -1.8625 __________ __________ 0.0000
|
||||||
|
illegal -1.2416 0.5400 0.0000 0.0000
|
||||||
|
"""
|
||||||
|
|
||||||
|
weights_k_3000: """
|
||||||
|
{'action=east': 6.719916513522846,
|
||||||
|
'action=exit': -2.2444981376861555,
|
||||||
|
'action=north': 4.568574519923728,
|
||||||
|
'action=south': 3.761510351874819,
|
||||||
|
'action=west': 1.2828606322891556,
|
||||||
|
'x=0': -3.604063955849794,
|
||||||
|
'x=1': 0.6731476152061693,
|
||||||
|
'x=2': 4.000208353074704,
|
||||||
|
'x=3': 5.988311380073477,
|
||||||
|
'x=4': 7.0307604874198235,
|
||||||
|
'y=0': -3.604063955849794,
|
||||||
|
'y=1': 0.6731476152061693,
|
||||||
|
'y=2': 4.000208353074704,
|
||||||
|
'y=3': 5.988311380073477,
|
||||||
|
'y=4': 7.0307604874198235,
|
||||||
|
(0, 0): -0.7073688447583666,
|
||||||
|
(0, 1): -0.7542862401704076,
|
||||||
|
(0, 2): -0.7043014501203066,
|
||||||
|
(0, 3): -0.7433344649617668,
|
||||||
|
(0, 4): -0.6947729558389527,
|
||||||
|
(1, 0): 2.364273811399719,
|
||||||
|
(1, 1): -0.2695405704605499,
|
||||||
|
(1, 2): -0.7105979212702271,
|
||||||
|
(1, 3): -1.4866826750327933,
|
||||||
|
(1, 4): 0.7756949705700219,
|
||||||
|
(2, 0): 2.64064253491107,
|
||||||
|
(2, 2): -3.7381118310263166,
|
||||||
|
(2, 4): 5.097677649189953,
|
||||||
|
(3, 0): 2.505262939441149,
|
||||||
|
(3, 2): 0.27218788923837256,
|
||||||
|
(3, 3): 2.2611084206093195,
|
||||||
|
(3, 4): 0.9497521307846304,
|
||||||
|
(4, 0): 1.7330586015291545,
|
||||||
|
(4, 1): 0.980194046153168,
|
||||||
|
(4, 2): 0.78786289128181,
|
||||||
|
(4, 3): 1.493343270762865,
|
||||||
|
(4, 4): 2.0363016776928333}
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_north: """
|
||||||
|
illegal 6.6906 illegal 17.4949 20.6664
|
||||||
|
illegal 4.4282 __________ 18.8063 20.1234
|
||||||
|
illegal 5.2043 illegal 16.8174 19.4180
|
||||||
|
illegal 5.6453 __________ __________ 19.6103
|
||||||
|
illegal 8.2791 15.2096 19.0505 20.3632
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_east: """
|
||||||
|
illegal 8.8419 illegal 19.6463 22.8177
|
||||||
|
illegal 6.5795 __________ 20.9576 22.2748
|
||||||
|
illegal 7.3556 illegal 18.9687 21.5693
|
||||||
|
illegal 7.7967 __________ __________ 21.7616
|
||||||
|
illegal 10.4305 17.3610 21.2018 22.5145
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_exit: """
|
||||||
|
-10.1474 illegal 10.8536 illegal illegal
|
||||||
|
-10.1960 illegal __________ illegal illegal
|
||||||
|
-10.1569 illegal 2.0178 illegal illegal
|
||||||
|
-10.2069 illegal __________ __________ illegal
|
||||||
|
-10.1600 illegal illegal illegal illegal
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_south: """
|
||||||
|
illegal 5.8835 illegal 16.6879 19.8593
|
||||||
|
illegal 3.6211 __________ 17.9992 19.3164
|
||||||
|
illegal 4.3972 illegal 16.0103 18.6109
|
||||||
|
illegal 4.8383 __________ __________ 18.8032
|
||||||
|
illegal 7.4721 14.4026 18.2434 19.5561
|
||||||
|
"""
|
||||||
|
|
||||||
|
q_values_k_3000_action_west: """
|
||||||
|
illegal 3.4049 illegal 14.2092 17.3807
|
||||||
|
illegal 1.1425 __________ 15.5206 16.8377
|
||||||
|
illegal 1.9186 illegal 13.5317 16.1322
|
||||||
|
illegal 2.3596 __________ __________ 16.3246
|
||||||
|
illegal 4.9934 11.9239 15.7647 17.0774
|
||||||
|
"""
|
||||||
|
|
||||||
25
p3_rl/test_cases/q8/5-coord-extractor.test
Normal file
25
p3_rl/test_cases/q8/5-coord-extractor.test
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
class: "ApproximateQLearningTest"
|
||||||
|
|
||||||
|
# GridWorld specification
|
||||||
|
# _ is empty space
|
||||||
|
# numbers are terminal states with that value
|
||||||
|
# # is a wall
|
||||||
|
# S is a start state
|
||||||
|
#
|
||||||
|
grid: """
|
||||||
|
-10 _ 10 _ _
|
||||||
|
-10 _ # _ _
|
||||||
|
-10 _ 1 _ _
|
||||||
|
-10 _ # # _
|
||||||
|
-10 S _ _ _
|
||||||
|
"""
|
||||||
|
discount: "0.9"
|
||||||
|
noise: "0.2"
|
||||||
|
livingReward: "0.0"
|
||||||
|
epsilon: "0.2"
|
||||||
|
learningRate: "0.1"
|
||||||
|
numExperiences: "3000"
|
||||||
|
valueIterations: "100"
|
||||||
|
iterations: "10000"
|
||||||
|
extractor: "CoordinateExtractor"
|
||||||
|
|
||||||
2
p3_rl/test_cases/q8/CONFIG
Normal file
2
p3_rl/test_cases/q8/CONFIG
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
max_points: "3"
|
||||||
|
class: "PassAllTestsQuestion"
|
||||||
81
p3_rl/textDisplay.py
Normal file
81
p3_rl/textDisplay.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# textDisplay.py
|
||||||
|
# --------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import time
|
||||||
|
try:
|
||||||
|
import pacman
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
DRAW_EVERY = 1
|
||||||
|
SLEEP_TIME = 0 # This can be overwritten by __init__
|
||||||
|
DISPLAY_MOVES = False
|
||||||
|
QUIET = False # Supresses output
|
||||||
|
|
||||||
|
class NullGraphics:
|
||||||
|
def initialize(self, state, isBlue = False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update(self, state):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def checkNullDisplay(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
time.sleep(SLEEP_TIME)
|
||||||
|
|
||||||
|
def draw(self, state):
|
||||||
|
print state
|
||||||
|
|
||||||
|
def updateDistributions(self, dist):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PacmanGraphics:
|
||||||
|
def __init__(self, speed=None):
|
||||||
|
if speed != None:
|
||||||
|
global SLEEP_TIME
|
||||||
|
SLEEP_TIME = speed
|
||||||
|
|
||||||
|
def initialize(self, state, isBlue = False):
|
||||||
|
self.draw(state)
|
||||||
|
self.pause()
|
||||||
|
self.turn = 0
|
||||||
|
self.agentCounter = 0
|
||||||
|
|
||||||
|
def update(self, state):
|
||||||
|
numAgents = len(state.agentStates)
|
||||||
|
self.agentCounter = (self.agentCounter + 1) % numAgents
|
||||||
|
if self.agentCounter == 0:
|
||||||
|
self.turn += 1
|
||||||
|
if DISPLAY_MOVES:
|
||||||
|
ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)]
|
||||||
|
print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts
|
||||||
|
if self.turn % DRAW_EVERY == 0:
|
||||||
|
self.draw(state)
|
||||||
|
self.pause()
|
||||||
|
if state._win or state._lose:
|
||||||
|
self.draw(state)
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
time.sleep(SLEEP_TIME)
|
||||||
|
|
||||||
|
def draw(self, state):
|
||||||
|
print state
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
pass
|
||||||
324
p3_rl/textGridworldDisplay.py
Normal file
324
p3_rl/textGridworldDisplay.py
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
# textGridworldDisplay.py
|
||||||
|
# -----------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import util
|
||||||
|
|
||||||
|
class TextGridworldDisplay:
|
||||||
|
|
||||||
|
def __init__(self, gridworld):
|
||||||
|
self.gridworld = gridworld
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def displayValues(self, agent, currentState = None, message = None):
|
||||||
|
if message != None:
|
||||||
|
print message
|
||||||
|
values = util.Counter()
|
||||||
|
policy = {}
|
||||||
|
states = self.gridworld.getStates()
|
||||||
|
for state in states:
|
||||||
|
values[state] = agent.getValue(state)
|
||||||
|
policy[state] = agent.getPolicy(state)
|
||||||
|
prettyPrintValues(self.gridworld, values, policy, currentState)
|
||||||
|
|
||||||
|
def displayNullValues(self, agent, currentState = None, message = None):
|
||||||
|
if message != None: print message
|
||||||
|
prettyPrintNullValues(self.gridworld, currentState)
|
||||||
|
|
||||||
|
def displayQValues(self, agent, currentState = None, message = None):
|
||||||
|
if message != None: print message
|
||||||
|
qValues = util.Counter()
|
||||||
|
states = self.gridworld.getStates()
|
||||||
|
for state in states:
|
||||||
|
for action in self.gridworld.getPossibleActions(state):
|
||||||
|
qValues[(state, action)] = agent.getQValue(state, action)
|
||||||
|
prettyPrintQValues(self.gridworld, qValues, currentState)
|
||||||
|
|
||||||
|
|
||||||
|
def prettyPrintValues(gridWorld, values, policy=None, currentState = None):
|
||||||
|
grid = gridWorld.grid
|
||||||
|
maxLen = 11
|
||||||
|
newRows = []
|
||||||
|
for y in range(grid.height):
|
||||||
|
newRow = []
|
||||||
|
for x in range(grid.width):
|
||||||
|
state = (x, y)
|
||||||
|
value = values[state]
|
||||||
|
action = None
|
||||||
|
if policy != None and state in policy:
|
||||||
|
action = policy[state]
|
||||||
|
actions = gridWorld.getPossibleActions(state)
|
||||||
|
if action not in actions and 'exit' in actions:
|
||||||
|
action = 'exit'
|
||||||
|
valString = None
|
||||||
|
if action == 'exit':
|
||||||
|
valString = border('%.2f' % value)
|
||||||
|
else:
|
||||||
|
valString = '\n\n%.2f\n\n' % value
|
||||||
|
valString += ' '*maxLen
|
||||||
|
if grid[x][y] == 'S':
|
||||||
|
valString = '\n\nS: %.2f\n\n' % value
|
||||||
|
valString += ' '*maxLen
|
||||||
|
if grid[x][y] == '#':
|
||||||
|
valString = '\n#####\n#####\n#####\n'
|
||||||
|
valString += ' '*maxLen
|
||||||
|
pieces = [valString]
|
||||||
|
text = ("\n".join(pieces)).split('\n')
|
||||||
|
if currentState == state:
|
||||||
|
l = len(text[1])
|
||||||
|
if l == 0:
|
||||||
|
text[1] = '*'
|
||||||
|
else:
|
||||||
|
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
|
||||||
|
if action == 'east':
|
||||||
|
text[2] = ' ' + text[2] + ' >'
|
||||||
|
elif action == 'west':
|
||||||
|
text[2] = '< ' + text[2] + ' '
|
||||||
|
elif action == 'north':
|
||||||
|
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
|
||||||
|
elif action == 'south':
|
||||||
|
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
|
||||||
|
newCell = "\n".join(text)
|
||||||
|
newRow.append(newCell)
|
||||||
|
newRows.append(newRow)
|
||||||
|
numCols = grid.width
|
||||||
|
for rowNum, row in enumerate(newRows):
|
||||||
|
row.insert(0,"\n\n"+str(rowNum))
|
||||||
|
newRows.reverse()
|
||||||
|
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||||
|
colLabels.insert(0,' ')
|
||||||
|
finalRows = [colLabels] + newRows
|
||||||
|
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||||
|
|
||||||
|
|
||||||
|
def prettyPrintNullValues(gridWorld, currentState = None):
|
||||||
|
grid = gridWorld.grid
|
||||||
|
maxLen = 11
|
||||||
|
newRows = []
|
||||||
|
for y in range(grid.height):
|
||||||
|
newRow = []
|
||||||
|
for x in range(grid.width):
|
||||||
|
state = (x, y)
|
||||||
|
|
||||||
|
# value = values[state]
|
||||||
|
|
||||||
|
action = None
|
||||||
|
# if policy != None and state in policy:
|
||||||
|
# action = policy[state]
|
||||||
|
#
|
||||||
|
actions = gridWorld.getPossibleActions(state)
|
||||||
|
|
||||||
|
if action not in actions and 'exit' in actions:
|
||||||
|
action = 'exit'
|
||||||
|
|
||||||
|
valString = None
|
||||||
|
# if action == 'exit':
|
||||||
|
# valString = border('%.2f' % value)
|
||||||
|
# else:
|
||||||
|
# valString = '\n\n%.2f\n\n' % value
|
||||||
|
# valString += ' '*maxLen
|
||||||
|
|
||||||
|
if grid[x][y] == 'S':
|
||||||
|
valString = '\n\nS\n\n'
|
||||||
|
valString += ' '*maxLen
|
||||||
|
elif grid[x][y] == '#':
|
||||||
|
valString = '\n#####\n#####\n#####\n'
|
||||||
|
valString += ' '*maxLen
|
||||||
|
elif type(grid[x][y]) == float or type(grid[x][y]) == int:
|
||||||
|
valString = border('%.2f' % float(grid[x][y]))
|
||||||
|
else: valString = border(' ')
|
||||||
|
pieces = [valString]
|
||||||
|
|
||||||
|
text = ("\n".join(pieces)).split('\n')
|
||||||
|
|
||||||
|
if currentState == state:
|
||||||
|
l = len(text[1])
|
||||||
|
if l == 0:
|
||||||
|
text[1] = '*'
|
||||||
|
else:
|
||||||
|
text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|"
|
||||||
|
|
||||||
|
if action == 'east':
|
||||||
|
text[2] = ' ' + text[2] + ' >'
|
||||||
|
elif action == 'west':
|
||||||
|
text[2] = '< ' + text[2] + ' '
|
||||||
|
elif action == 'north':
|
||||||
|
text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2)
|
||||||
|
elif action == 'south':
|
||||||
|
text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2)
|
||||||
|
newCell = "\n".join(text)
|
||||||
|
newRow.append(newCell)
|
||||||
|
newRows.append(newRow)
|
||||||
|
numCols = grid.width
|
||||||
|
for rowNum, row in enumerate(newRows):
|
||||||
|
row.insert(0,"\n\n"+str(rowNum))
|
||||||
|
newRows.reverse()
|
||||||
|
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||||
|
colLabels.insert(0,' ')
|
||||||
|
finalRows = [colLabels] + newRows
|
||||||
|
print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||||
|
|
||||||
|
def prettyPrintQValues(gridWorld, qValues, currentState=None):
|
||||||
|
grid = gridWorld.grid
|
||||||
|
maxLen = 11
|
||||||
|
newRows = []
|
||||||
|
for y in range(grid.height):
|
||||||
|
newRow = []
|
||||||
|
for x in range(grid.width):
|
||||||
|
state = (x, y)
|
||||||
|
actions = gridWorld.getPossibleActions(state)
|
||||||
|
if actions == None or len(actions) == 0:
|
||||||
|
actions = [None]
|
||||||
|
bestQ = max([qValues[(state, action)] for action in actions])
|
||||||
|
bestActions = [action for action in actions if qValues[(state, action)] == bestQ]
|
||||||
|
|
||||||
|
# display cell
|
||||||
|
qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions])
|
||||||
|
northString = ('north' in qStrings and qStrings['north']) or ' '
|
||||||
|
southString = ('south' in qStrings and qStrings['south']) or ' '
|
||||||
|
eastString = ('east' in qStrings and qStrings['east']) or ' '
|
||||||
|
westString = ('west' in qStrings and qStrings['west']) or ' '
|
||||||
|
exitString = ('exit' in qStrings and qStrings['exit']) or ' '
|
||||||
|
|
||||||
|
eastLen = len(eastString)
|
||||||
|
westLen = len(westString)
|
||||||
|
if eastLen < westLen:
|
||||||
|
eastString = ' '*(westLen-eastLen)+eastString
|
||||||
|
if westLen < eastLen:
|
||||||
|
westString = westString+' '*(eastLen-westLen)
|
||||||
|
|
||||||
|
if 'north' in bestActions:
|
||||||
|
northString = '/'+northString+'\\'
|
||||||
|
if 'south' in bestActions:
|
||||||
|
southString = '\\'+southString+'/'
|
||||||
|
if 'east' in bestActions:
|
||||||
|
eastString = ''+eastString+'>'
|
||||||
|
else:
|
||||||
|
eastString = ''+eastString+' '
|
||||||
|
if 'west' in bestActions:
|
||||||
|
westString = '<'+westString+''
|
||||||
|
else:
|
||||||
|
westString = ' '+westString+''
|
||||||
|
if 'exit' in bestActions:
|
||||||
|
exitString = '[ '+exitString+' ]'
|
||||||
|
|
||||||
|
|
||||||
|
ewString = westString + " " + eastString
|
||||||
|
if state == currentState:
|
||||||
|
ewString = westString + " * " + eastString
|
||||||
|
if state == gridWorld.getStartState():
|
||||||
|
ewString = westString + " S " + eastString
|
||||||
|
if state == currentState and state == gridWorld.getStartState():
|
||||||
|
ewString = westString + " S:* " + eastString
|
||||||
|
|
||||||
|
text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString]
|
||||||
|
|
||||||
|
if grid[x][y] == '#':
|
||||||
|
text = ['', '\n#####\n#####\n#####', '']
|
||||||
|
|
||||||
|
newCell = "\n".join(text)
|
||||||
|
newRow.append(newCell)
|
||||||
|
newRows.append(newRow)
|
||||||
|
numCols = grid.width
|
||||||
|
for rowNum, row in enumerate(newRows):
|
||||||
|
row.insert(0,"\n\n\n"+str(rowNum))
|
||||||
|
newRows.reverse()
|
||||||
|
colLabels = [str(colNum) for colNum in range(numCols)]
|
||||||
|
colLabels.insert(0,' ')
|
||||||
|
finalRows = [colLabels] + newRows
|
||||||
|
|
||||||
|
print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True)
|
||||||
|
|
||||||
|
def border(text):
|
||||||
|
length = len(text)
|
||||||
|
pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)]
|
||||||
|
return '\n'.join(pieces)
|
||||||
|
|
||||||
|
# INDENTING CODE
|
||||||
|
|
||||||
|
# Indenting code based on a post from George Sakkis
|
||||||
|
# (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662)
|
||||||
|
|
||||||
|
import cStringIO,operator
|
||||||
|
|
||||||
|
def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
|
||||||
|
separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
|
||||||
|
"""Indents a table by column.
|
||||||
|
- rows: A sequence of sequences of items, one sequence per row.
|
||||||
|
- hasHeader: True if the first row consists of the columns' names.
|
||||||
|
- headerChar: Character to be used for the row separator line
|
||||||
|
(if hasHeader==True or separateRows==True).
|
||||||
|
- delim: The column delimiter.
|
||||||
|
- justify: Determines how are data justified in their column.
|
||||||
|
Valid values are 'left','right' and 'center'.
|
||||||
|
- separateRows: True if rows are to be separated by a line
|
||||||
|
of 'headerChar's.
|
||||||
|
- prefix: A string prepended to each printed row.
|
||||||
|
- postfix: A string appended to each printed row.
|
||||||
|
- wrapfunc: A function f(text) for wrapping text; each element in
|
||||||
|
the table is first wrapped by this function."""
|
||||||
|
# closure for breaking logical rows to physical, using wrapfunc
|
||||||
|
def rowWrapper(row):
|
||||||
|
newRows = [wrapfunc(item).split('\n') for item in row]
|
||||||
|
return [[substr or '' for substr in item] for item in map(None,*newRows)]
|
||||||
|
# break each logical row into one or more physical ones
|
||||||
|
logicalRows = [rowWrapper(row) for row in rows]
|
||||||
|
# columns of physical rows
|
||||||
|
columns = map(None,*reduce(operator.add,logicalRows))
|
||||||
|
# get the maximum of each column by the string length of its items
|
||||||
|
maxWidths = [max([len(str(item)) for item in column]) for column in columns]
|
||||||
|
rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
|
||||||
|
len(delim)*(len(maxWidths)-1))
|
||||||
|
# select the appropriate justify method
|
||||||
|
justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
|
||||||
|
output=cStringIO.StringIO()
|
||||||
|
if separateRows: print >> output, rowSeparator
|
||||||
|
for physicalRows in logicalRows:
|
||||||
|
for row in physicalRows:
|
||||||
|
print >> output, \
|
||||||
|
prefix \
|
||||||
|
+ delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
|
||||||
|
+ postfix
|
||||||
|
if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
|
||||||
|
return output.getvalue()
|
||||||
|
|
||||||
|
import math
|
||||||
|
def wrap_always(text, width):
|
||||||
|
"""A simple word-wrap function that wraps text on exactly width characters.
|
||||||
|
It doesn't split the text in words."""
|
||||||
|
return '\n'.join([ text[width*i:width*(i+1)] \
|
||||||
|
for i in xrange(int(math.ceil(1.*len(text)/width))) ])
|
||||||
|
|
||||||
|
|
||||||
|
# TEST OF DISPLAY CODE
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import gridworld, util
|
||||||
|
|
||||||
|
grid = gridworld.getCliffGrid3()
|
||||||
|
print grid.getStates()
|
||||||
|
|
||||||
|
policy = dict([(state,'east') for state in grid.getStates()])
|
||||||
|
values = util.Counter(dict([(state,1000.23) for state in grid.getStates()]))
|
||||||
|
prettyPrintValues(grid, values, policy, currentState = (0,0))
|
||||||
|
|
||||||
|
stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()]
|
||||||
|
qStates = reduce(lambda x,y: x+y, stateCrossActions, [])
|
||||||
|
qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates]))
|
||||||
|
qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])]))
|
||||||
|
prettyPrintQValues(grid, qValues, currentState = (0,0))
|
||||||
653
p3_rl/util.py
Normal file
653
p3_rl/util.py
Normal file
@@ -0,0 +1,653 @@
|
|||||||
|
# util.py
|
||||||
|
# -------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import inspect
|
||||||
|
import heapq, random
|
||||||
|
import cStringIO
|
||||||
|
|
||||||
|
|
||||||
|
class FixedRandom:
|
||||||
|
def __init__(self):
|
||||||
|
fixedState = (3, (2147483648L, 507801126L, 683453281L, 310439348L, 2597246090L, \
|
||||||
|
2209084787L, 2267831527L, 979920060L, 3098657677L, 37650879L, 807947081L, 3974896263L, \
|
||||||
|
881243242L, 3100634921L, 1334775171L, 3965168385L, 746264660L, 4074750168L, 500078808L, \
|
||||||
|
776561771L, 702988163L, 1636311725L, 2559226045L, 157578202L, 2498342920L, 2794591496L, \
|
||||||
|
4130598723L, 496985844L, 2944563015L, 3731321600L, 3514814613L, 3362575829L, 3038768745L, \
|
||||||
|
2206497038L, 1108748846L, 1317460727L, 3134077628L, 988312410L, 1674063516L, 746456451L, \
|
||||||
|
3958482413L, 1857117812L, 708750586L, 1583423339L, 3466495450L, 1536929345L, 1137240525L, \
|
||||||
|
3875025632L, 2466137587L, 1235845595L, 4214575620L, 3792516855L, 657994358L, 1241843248L, \
|
||||||
|
1695651859L, 3678946666L, 1929922113L, 2351044952L, 2317810202L, 2039319015L, 460787996L, \
|
||||||
|
3654096216L, 4068721415L, 1814163703L, 2904112444L, 1386111013L, 574629867L, 2654529343L, \
|
||||||
|
3833135042L, 2725328455L, 552431551L, 4006991378L, 1331562057L, 3710134542L, 303171486L, \
|
||||||
|
1203231078L, 2670768975L, 54570816L, 2679609001L, 578983064L, 1271454725L, 3230871056L, \
|
||||||
|
2496832891L, 2944938195L, 1608828728L, 367886575L, 2544708204L, 103775539L, 1912402393L, \
|
||||||
|
1098482180L, 2738577070L, 3091646463L, 1505274463L, 2079416566L, 659100352L, 839995305L, \
|
||||||
|
1696257633L, 274389836L, 3973303017L, 671127655L, 1061109122L, 517486945L, 1379749962L, \
|
||||||
|
3421383928L, 3116950429L, 2165882425L, 2346928266L, 2892678711L, 2936066049L, 1316407868L, \
|
||||||
|
2873411858L, 4279682888L, 2744351923L, 3290373816L, 1014377279L, 955200944L, 4220990860L, \
|
||||||
|
2386098930L, 1772997650L, 3757346974L, 1621616438L, 2877097197L, 442116595L, 2010480266L, \
|
||||||
|
2867861469L, 2955352695L, 605335967L, 2222936009L, 2067554933L, 4129906358L, 1519608541L, \
|
||||||
|
1195006590L, 1942991038L, 2736562236L, 279162408L, 1415982909L, 4099901426L, 1732201505L, \
|
||||||
|
2934657937L, 860563237L, 2479235483L, 3081651097L, 2244720867L, 3112631622L, 1636991639L, \
|
||||||
|
3860393305L, 2312061927L, 48780114L, 1149090394L, 2643246550L, 1764050647L, 3836789087L, \
|
||||||
|
3474859076L, 4237194338L, 1735191073L, 2150369208L, 92164394L, 756974036L, 2314453957L, \
|
||||||
|
323969533L, 4267621035L, 283649842L, 810004843L, 727855536L, 1757827251L, 3334960421L, \
|
||||||
|
3261035106L, 38417393L, 2660980472L, 1256633965L, 2184045390L, 811213141L, 2857482069L, \
|
||||||
|
2237770878L, 3891003138L, 2787806886L, 2435192790L, 2249324662L, 3507764896L, 995388363L, \
|
||||||
|
856944153L, 619213904L, 3233967826L, 3703465555L, 3286531781L, 3863193356L, 2992340714L, \
|
||||||
|
413696855L, 3865185632L, 1704163171L, 3043634452L, 2225424707L, 2199018022L, 3506117517L, \
|
||||||
|
3311559776L, 3374443561L, 1207829628L, 668793165L, 1822020716L, 2082656160L, 1160606415L, \
|
||||||
|
3034757648L, 741703672L, 3094328738L, 459332691L, 2702383376L, 1610239915L, 4162939394L, \
|
||||||
|
557861574L, 3805706338L, 3832520705L, 1248934879L, 3250424034L, 892335058L, 74323433L, \
|
||||||
|
3209751608L, 3213220797L, 3444035873L, 3743886725L, 1783837251L, 610968664L, 580745246L, \
|
||||||
|
4041979504L, 201684874L, 2673219253L, 1377283008L, 3497299167L, 2344209394L, 2304982920L, \
|
||||||
|
3081403782L, 2599256854L, 3184475235L, 3373055826L, 695186388L, 2423332338L, 222864327L, \
|
||||||
|
1258227992L, 3627871647L, 3487724980L, 4027953808L, 3053320360L, 533627073L, 3026232514L, \
|
||||||
|
2340271949L, 867277230L, 868513116L, 2158535651L, 2487822909L, 3428235761L, 3067196046L, \
|
||||||
|
3435119657L, 1908441839L, 788668797L, 3367703138L, 3317763187L, 908264443L, 2252100381L, \
|
||||||
|
764223334L, 4127108988L, 384641349L, 3377374722L, 1263833251L, 1958694944L, 3847832657L, \
|
||||||
|
1253909612L, 1096494446L, 555725445L, 2277045895L, 3340096504L, 1383318686L, 4234428127L, \
|
||||||
|
1072582179L, 94169494L, 1064509968L, 2681151917L, 2681864920L, 734708852L, 1338914021L, \
|
||||||
|
1270409500L, 1789469116L, 4191988204L, 1716329784L, 2213764829L, 3712538840L, 919910444L, \
|
||||||
|
1318414447L, 3383806712L, 3054941722L, 3378649942L, 1205735655L, 1268136494L, 2214009444L, \
|
||||||
|
2532395133L, 3232230447L, 230294038L, 342599089L, 772808141L, 4096882234L, 3146662953L, \
|
||||||
|
2784264306L, 1860954704L, 2675279609L, 2984212876L, 2466966981L, 2627986059L, 2985545332L, \
|
||||||
|
2578042598L, 1458940786L, 2944243755L, 3959506256L, 1509151382L, 325761900L, 942251521L, \
|
||||||
|
4184289782L, 2756231555L, 3297811774L, 1169708099L, 3280524138L, 3805245319L, 3227360276L, \
|
||||||
|
3199632491L, 2235795585L, 2865407118L, 36763651L, 2441503575L, 3314890374L, 1755526087L, \
|
||||||
|
17915536L, 1196948233L, 949343045L, 3815841867L, 489007833L, 2654997597L, 2834744136L, \
|
||||||
|
417688687L, 2843220846L, 85621843L, 747339336L, 2043645709L, 3520444394L, 1825470818L, \
|
||||||
|
647778910L, 275904777L, 1249389189L, 3640887431L, 4200779599L, 323384601L, 3446088641L, \
|
||||||
|
4049835786L, 1718989062L, 3563787136L, 44099190L, 3281263107L, 22910812L, 1826109246L, \
|
||||||
|
745118154L, 3392171319L, 1571490704L, 354891067L, 815955642L, 1453450421L, 940015623L, \
|
||||||
|
796817754L, 1260148619L, 3898237757L, 176670141L, 1870249326L, 3317738680L, 448918002L, \
|
||||||
|
4059166594L, 2003827551L, 987091377L, 224855998L, 3520570137L, 789522610L, 2604445123L, \
|
||||||
|
454472869L, 475688926L, 2990723466L, 523362238L, 3897608102L, 806637149L, 2642229586L, \
|
||||||
|
2928614432L, 1564415411L, 1691381054L, 3816907227L, 4082581003L, 1895544448L, 3728217394L, \
|
||||||
|
3214813157L, 4054301607L, 1882632454L, 2873728645L, 3694943071L, 1297991732L, 2101682438L, \
|
||||||
|
3952579552L, 678650400L, 1391722293L, 478833748L, 2976468591L, 158586606L, 2576499787L, \
|
||||||
|
662690848L, 3799889765L, 3328894692L, 2474578497L, 2383901391L, 1718193504L, 3003184595L, \
|
||||||
|
3630561213L, 1929441113L, 3848238627L, 1594310094L, 3040359840L, 3051803867L, 2462788790L, \
|
||||||
|
954409915L, 802581771L, 681703307L, 545982392L, 2738993819L, 8025358L, 2827719383L, \
|
||||||
|
770471093L, 3484895980L, 3111306320L, 3900000891L, 2116916652L, 397746721L, 2087689510L, \
|
||||||
|
721433935L, 1396088885L, 2751612384L, 1998988613L, 2135074843L, 2521131298L, 707009172L, \
|
||||||
|
2398321482L, 688041159L, 2264560137L, 482388305L, 207864885L, 3735036991L, 3490348331L, \
|
||||||
|
1963642811L, 3260224305L, 3493564223L, 1939428454L, 1128799656L, 1366012432L, 2858822447L, \
|
||||||
|
1428147157L, 2261125391L, 1611208390L, 1134826333L, 2374102525L, 3833625209L, 2266397263L, \
|
||||||
|
3189115077L, 770080230L, 2674657172L, 4280146640L, 3604531615L, 4235071805L, 3436987249L, \
|
||||||
|
509704467L, 2582695198L, 4256268040L, 3391197562L, 1460642842L, 1617931012L, 457825497L, \
|
||||||
|
1031452907L, 1330422862L, 4125947620L, 2280712485L, 431892090L, 2387410588L, 2061126784L, \
|
||||||
|
896457479L, 3480499461L, 2488196663L, 4021103792L, 1877063114L, 2744470201L, 1046140599L, \
|
||||||
|
2129952955L, 3583049218L, 4217723693L, 2720341743L, 820661843L, 1079873609L, 3360954200L, \
|
||||||
|
3652304997L, 3335838575L, 2178810636L, 1908053374L, 4026721976L, 1793145418L, 476541615L, \
|
||||||
|
973420250L, 515553040L, 919292001L, 2601786155L, 1685119450L, 3030170809L, 1590676150L, \
|
||||||
|
1665099167L, 651151584L, 2077190587L, 957892642L, 646336572L, 2743719258L, 866169074L, \
|
||||||
|
851118829L, 4225766285L, 963748226L, 799549420L, 1955032629L, 799460000L, 2425744063L, \
|
||||||
|
2441291571L, 1928963772L, 528930629L, 2591962884L, 3495142819L, 1896021824L, 901320159L, \
|
||||||
|
3181820243L, 843061941L, 3338628510L, 3782438992L, 9515330L, 1705797226L, 953535929L, \
|
||||||
|
764833876L, 3202464965L, 2970244591L, 519154982L, 3390617541L, 566616744L, 3438031503L, \
|
||||||
|
1853838297L, 170608755L, 1393728434L, 676900116L, 3184965776L, 1843100290L, 78995357L, \
|
||||||
|
2227939888L, 3460264600L, 1745705055L, 1474086965L, 572796246L, 4081303004L, 882828851L, \
|
||||||
|
1295445825L, 137639900L, 3304579600L, 2722437017L, 4093422709L, 273203373L, 2666507854L, \
|
||||||
|
3998836510L, 493829981L, 1623949669L, 3482036755L, 3390023939L, 833233937L, 1639668730L, \
|
||||||
|
1499455075L, 249728260L, 1210694006L, 3836497489L, 1551488720L, 3253074267L, 3388238003L, \
|
||||||
|
2372035079L, 3945715164L, 2029501215L, 3362012634L, 2007375355L, 4074709820L, 631485888L, \
|
||||||
|
3135015769L, 4273087084L, 3648076204L, 2739943601L, 1374020358L, 1760722448L, 3773939706L, \
|
||||||
|
1313027823L, 1895251226L, 4224465911L, 421382535L, 1141067370L, 3660034846L, 3393185650L, \
|
||||||
|
1850995280L, 1451917312L, 3841455409L, 3926840308L, 1397397252L, 2572864479L, 2500171350L, \
|
||||||
|
3119920613L, 531400869L, 1626487579L, 1099320497L, 407414753L, 2438623324L, 99073255L, \
|
||||||
|
3175491512L, 656431560L, 1153671785L, 236307875L, 2824738046L, 2320621382L, 892174056L, \
|
||||||
|
230984053L, 719791226L, 2718891946L, 624L), None)
|
||||||
|
self.random = random.Random()
|
||||||
|
self.random.setstate(fixedState)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Data structures useful for implementing SearchAgents
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Stack:
|
||||||
|
"A container with a last-in-first-out (LIFO) queuing policy."
|
||||||
|
def __init__(self):
|
||||||
|
self.list = []
|
||||||
|
|
||||||
|
def push(self,item):
|
||||||
|
"Push 'item' onto the stack"
|
||||||
|
self.list.append(item)
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
"Pop the most recently pushed item from the stack"
|
||||||
|
return self.list.pop()
|
||||||
|
|
||||||
|
def isEmpty(self):
|
||||||
|
"Returns true if the stack is empty"
|
||||||
|
return len(self.list) == 0
|
||||||
|
|
||||||
|
class Queue:
|
||||||
|
"A container with a first-in-first-out (FIFO) queuing policy."
|
||||||
|
def __init__(self):
|
||||||
|
self.list = []
|
||||||
|
|
||||||
|
def push(self,item):
|
||||||
|
"Enqueue the 'item' into the queue"
|
||||||
|
self.list.insert(0,item)
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
"""
|
||||||
|
Dequeue the earliest enqueued item still in the queue. This
|
||||||
|
operation removes the item from the queue.
|
||||||
|
"""
|
||||||
|
return self.list.pop()
|
||||||
|
|
||||||
|
def isEmpty(self):
|
||||||
|
"Returns true if the queue is empty"
|
||||||
|
return len(self.list) == 0
|
||||||
|
|
||||||
|
class PriorityQueue:
|
||||||
|
"""
|
||||||
|
Implements a priority queue data structure. Each inserted item
|
||||||
|
has a priority associated with it and the client is usually interested
|
||||||
|
in quick retrieval of the lowest-priority item in the queue. This
|
||||||
|
data structure allows O(1) access to the lowest-priority item.
|
||||||
|
|
||||||
|
Note that this PriorityQueue does not allow you to change the priority
|
||||||
|
of an item. However, you may insert the same item multiple times with
|
||||||
|
different priorities.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.heap = []
|
||||||
|
self.count = 0
|
||||||
|
|
||||||
|
def push(self, item, priority):
|
||||||
|
# FIXME: restored old behaviour to check against old results better
|
||||||
|
# FIXED: restored to stable behaviour
|
||||||
|
entry = (priority, self.count, item)
|
||||||
|
# entry = (priority, item)
|
||||||
|
heapq.heappush(self.heap, entry)
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
(_, _, item) = heapq.heappop(self.heap)
|
||||||
|
# (_, item) = heapq.heappop(self.heap)
|
||||||
|
return item
|
||||||
|
|
||||||
|
def isEmpty(self):
|
||||||
|
return len(self.heap) == 0
|
||||||
|
|
||||||
|
class PriorityQueueWithFunction(PriorityQueue):
|
||||||
|
"""
|
||||||
|
Implements a priority queue with the same push/pop signature of the
|
||||||
|
Queue and the Stack classes. This is designed for drop-in replacement for
|
||||||
|
those two classes. The caller has to provide a priority function, which
|
||||||
|
extracts each item's priority.
|
||||||
|
"""
|
||||||
|
def __init__(self, priorityFunction):
|
||||||
|
"priorityFunction (item) -> priority"
|
||||||
|
self.priorityFunction = priorityFunction # store the priority function
|
||||||
|
PriorityQueue.__init__(self) # super-class initializer
|
||||||
|
|
||||||
|
def push(self, item):
|
||||||
|
"Adds an item to the queue with priority from the priority function"
|
||||||
|
PriorityQueue.push(self, item, self.priorityFunction(item))
|
||||||
|
|
||||||
|
|
||||||
|
def manhattanDistance( xy1, xy2 ):
|
||||||
|
"Returns the Manhattan distance between points xy1 and xy2"
|
||||||
|
return abs( xy1[0] - xy2[0] ) + abs( xy1[1] - xy2[1] )
|
||||||
|
|
||||||
|
"""
|
||||||
|
Data structures and functions useful for various course projects
|
||||||
|
|
||||||
|
The search project should not need anything below this line.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Counter(dict):
|
||||||
|
"""
|
||||||
|
A counter keeps track of counts for a set of keys.
|
||||||
|
|
||||||
|
The counter class is an extension of the standard python
|
||||||
|
dictionary type. It is specialized to have number values
|
||||||
|
(integers or floats), and includes a handful of additional
|
||||||
|
functions to ease the task of counting data. In particular,
|
||||||
|
all keys are defaulted to have value 0. Using a dictionary:
|
||||||
|
|
||||||
|
a = {}
|
||||||
|
print a['test']
|
||||||
|
|
||||||
|
would give an error, while the Counter class analogue:
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> print a['test']
|
||||||
|
0
|
||||||
|
|
||||||
|
returns the default 0 value. Note that to reference a key
|
||||||
|
that you know is contained in the counter,
|
||||||
|
you can still use the dictionary syntax:
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> a['test'] = 2
|
||||||
|
>>> print a['test']
|
||||||
|
2
|
||||||
|
|
||||||
|
This is very useful for counting things without initializing their counts,
|
||||||
|
see for example:
|
||||||
|
|
||||||
|
>>> a['blah'] += 1
|
||||||
|
>>> print a['blah']
|
||||||
|
1
|
||||||
|
|
||||||
|
The counter also includes additional functionality useful in implementing
|
||||||
|
the classifiers for this assignment. Two counters can be added,
|
||||||
|
subtracted or multiplied together. See below for details. They can
|
||||||
|
also be normalized and their total count and arg max can be extracted.
|
||||||
|
"""
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
self.setdefault(idx, 0)
|
||||||
|
return dict.__getitem__(self, idx)
|
||||||
|
|
||||||
|
def incrementAll(self, keys, count):
|
||||||
|
"""
|
||||||
|
Increments all elements of keys by the same count.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> a.incrementAll(['one','two', 'three'], 1)
|
||||||
|
>>> a['one']
|
||||||
|
1
|
||||||
|
>>> a['two']
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
for key in keys:
|
||||||
|
self[key] += count
|
||||||
|
|
||||||
|
def argMax(self):
|
||||||
|
"""
|
||||||
|
Returns the key with the highest value.
|
||||||
|
"""
|
||||||
|
if len(self.keys()) == 0: return None
|
||||||
|
all = self.items()
|
||||||
|
values = [x[1] for x in all]
|
||||||
|
maxIndex = values.index(max(values))
|
||||||
|
return all[maxIndex][0]
|
||||||
|
|
||||||
|
def sortedKeys(self):
|
||||||
|
"""
|
||||||
|
Returns a list of keys sorted by their values. Keys
|
||||||
|
with the highest values will appear first.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> a['first'] = -2
|
||||||
|
>>> a['second'] = 4
|
||||||
|
>>> a['third'] = 1
|
||||||
|
>>> a.sortedKeys()
|
||||||
|
['second', 'third', 'first']
|
||||||
|
"""
|
||||||
|
sortedItems = self.items()
|
||||||
|
compare = lambda x, y: sign(y[1] - x[1])
|
||||||
|
sortedItems.sort(cmp=compare)
|
||||||
|
return [x[0] for x in sortedItems]
|
||||||
|
|
||||||
|
def totalCount(self):
|
||||||
|
"""
|
||||||
|
Returns the sum of counts for all keys.
|
||||||
|
"""
|
||||||
|
return sum(self.values())
|
||||||
|
|
||||||
|
def normalize(self):
|
||||||
|
"""
|
||||||
|
Edits the counter such that the total count of all
|
||||||
|
keys sums to 1. The ratio of counts for all keys
|
||||||
|
will remain the same. Note that normalizing an empty
|
||||||
|
Counter will result in an error.
|
||||||
|
"""
|
||||||
|
total = float(self.totalCount())
|
||||||
|
if total == 0: return
|
||||||
|
for key in self.keys():
|
||||||
|
self[key] = self[key] / total
|
||||||
|
|
||||||
|
def divideAll(self, divisor):
|
||||||
|
"""
|
||||||
|
Divides all counts by divisor
|
||||||
|
"""
|
||||||
|
divisor = float(divisor)
|
||||||
|
for key in self:
|
||||||
|
self[key] /= divisor
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""
|
||||||
|
Returns a copy of the counter
|
||||||
|
"""
|
||||||
|
return Counter(dict.copy(self))
|
||||||
|
|
||||||
|
def __mul__(self, y ):
|
||||||
|
"""
|
||||||
|
Multiplying two counters gives the dot product of their vectors where
|
||||||
|
each unique label is a vector element.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> b = Counter()
|
||||||
|
>>> a['first'] = -2
|
||||||
|
>>> a['second'] = 4
|
||||||
|
>>> b['first'] = 3
|
||||||
|
>>> b['second'] = 5
|
||||||
|
>>> a['third'] = 1.5
|
||||||
|
>>> a['fourth'] = 2.5
|
||||||
|
>>> a * b
|
||||||
|
14
|
||||||
|
"""
|
||||||
|
sum = 0
|
||||||
|
x = self
|
||||||
|
if len(x) > len(y):
|
||||||
|
x,y = y,x
|
||||||
|
for key in x:
|
||||||
|
if key not in y:
|
||||||
|
continue
|
||||||
|
sum += x[key] * y[key]
|
||||||
|
return sum
|
||||||
|
|
||||||
|
def __radd__(self, y):
|
||||||
|
"""
|
||||||
|
Adding another counter to a counter increments the current counter
|
||||||
|
by the values stored in the second counter.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> b = Counter()
|
||||||
|
>>> a['first'] = -2
|
||||||
|
>>> a['second'] = 4
|
||||||
|
>>> b['first'] = 3
|
||||||
|
>>> b['third'] = 1
|
||||||
|
>>> a += b
|
||||||
|
>>> a['first']
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
for key, value in y.items():
|
||||||
|
self[key] += value
|
||||||
|
|
||||||
|
def __add__( self, y ):
|
||||||
|
"""
|
||||||
|
Adding two counters gives a counter with the union of all keys and
|
||||||
|
counts of the second added to counts of the first.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> b = Counter()
|
||||||
|
>>> a['first'] = -2
|
||||||
|
>>> a['second'] = 4
|
||||||
|
>>> b['first'] = 3
|
||||||
|
>>> b['third'] = 1
|
||||||
|
>>> (a + b)['first']
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
addend = Counter()
|
||||||
|
for key in self:
|
||||||
|
if key in y:
|
||||||
|
addend[key] = self[key] + y[key]
|
||||||
|
else:
|
||||||
|
addend[key] = self[key]
|
||||||
|
for key in y:
|
||||||
|
if key in self:
|
||||||
|
continue
|
||||||
|
addend[key] = y[key]
|
||||||
|
return addend
|
||||||
|
|
||||||
|
def __sub__( self, y ):
|
||||||
|
"""
|
||||||
|
Subtracting a counter from another gives a counter with the union of all keys and
|
||||||
|
counts of the second subtracted from counts of the first.
|
||||||
|
|
||||||
|
>>> a = Counter()
|
||||||
|
>>> b = Counter()
|
||||||
|
>>> a['first'] = -2
|
||||||
|
>>> a['second'] = 4
|
||||||
|
>>> b['first'] = 3
|
||||||
|
>>> b['third'] = 1
|
||||||
|
>>> (a - b)['first']
|
||||||
|
-5
|
||||||
|
"""
|
||||||
|
addend = Counter()
|
||||||
|
for key in self:
|
||||||
|
if key in y:
|
||||||
|
addend[key] = self[key] - y[key]
|
||||||
|
else:
|
||||||
|
addend[key] = self[key]
|
||||||
|
for key in y:
|
||||||
|
if key in self:
|
||||||
|
continue
|
||||||
|
addend[key] = -1 * y[key]
|
||||||
|
return addend
|
||||||
|
|
||||||
|
def raiseNotDefined():
|
||||||
|
fileName = inspect.stack()[1][1]
|
||||||
|
line = inspect.stack()[1][2]
|
||||||
|
method = inspect.stack()[1][3]
|
||||||
|
|
||||||
|
print "*** Method not implemented: %s at line %s of %s" % (method, line, fileName)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def normalize(vectorOrCounter):
|
||||||
|
"""
|
||||||
|
normalize a vector or counter by dividing each value by the sum of all values
|
||||||
|
"""
|
||||||
|
normalizedCounter = Counter()
|
||||||
|
if type(vectorOrCounter) == type(normalizedCounter):
|
||||||
|
counter = vectorOrCounter
|
||||||
|
total = float(counter.totalCount())
|
||||||
|
if total == 0: return counter
|
||||||
|
for key in counter.keys():
|
||||||
|
value = counter[key]
|
||||||
|
normalizedCounter[key] = value / total
|
||||||
|
return normalizedCounter
|
||||||
|
else:
|
||||||
|
vector = vectorOrCounter
|
||||||
|
s = float(sum(vector))
|
||||||
|
if s == 0: return vector
|
||||||
|
return [el / s for el in vector]
|
||||||
|
|
||||||
|
def nSample(distribution, values, n):
|
||||||
|
if sum(distribution) != 1:
|
||||||
|
distribution = normalize(distribution)
|
||||||
|
rand = [random.random() for i in range(n)]
|
||||||
|
rand.sort()
|
||||||
|
samples = []
|
||||||
|
samplePos, distPos, cdf = 0,0, distribution[0]
|
||||||
|
while samplePos < n:
|
||||||
|
if rand[samplePos] < cdf:
|
||||||
|
samplePos += 1
|
||||||
|
samples.append(values[distPos])
|
||||||
|
else:
|
||||||
|
distPos += 1
|
||||||
|
cdf += distribution[distPos]
|
||||||
|
return samples
|
||||||
|
|
||||||
|
def sample(distribution, values = None):
|
||||||
|
if type(distribution) == Counter:
|
||||||
|
items = sorted(distribution.items())
|
||||||
|
distribution = [i[1] for i in items]
|
||||||
|
values = [i[0] for i in items]
|
||||||
|
if sum(distribution) != 1:
|
||||||
|
distribution = normalize(distribution)
|
||||||
|
choice = random.random()
|
||||||
|
i, total= 0, distribution[0]
|
||||||
|
while choice > total:
|
||||||
|
i += 1
|
||||||
|
total += distribution[i]
|
||||||
|
return values[i]
|
||||||
|
|
||||||
|
def sampleFromCounter(ctr):
|
||||||
|
items = sorted(ctr.items())
|
||||||
|
return sample([v for k,v in items], [k for k,v in items])
|
||||||
|
|
||||||
|
def getProbability(value, distribution, values):
|
||||||
|
"""
|
||||||
|
Gives the probability of a value under a discrete distribution
|
||||||
|
defined by (distributions, values).
|
||||||
|
"""
|
||||||
|
total = 0.0
|
||||||
|
for prob, val in zip(distribution, values):
|
||||||
|
if val == value:
|
||||||
|
total += prob
|
||||||
|
return total
|
||||||
|
|
||||||
|
def flipCoin( p ):
|
||||||
|
r = random.random()
|
||||||
|
return r < p
|
||||||
|
|
||||||
|
def chooseFromDistribution( distribution ):
|
||||||
|
"Takes either a counter or a list of (prob, key) pairs and samples"
|
||||||
|
if type(distribution) == dict or type(distribution) == Counter:
|
||||||
|
return sample(distribution)
|
||||||
|
r = random.random()
|
||||||
|
base = 0.0
|
||||||
|
for prob, element in distribution:
|
||||||
|
base += prob
|
||||||
|
if r <= base: return element
|
||||||
|
|
||||||
|
def nearestPoint( pos ):
|
||||||
|
"""
|
||||||
|
Finds the nearest grid point to a position (discretizes).
|
||||||
|
"""
|
||||||
|
( current_row, current_col ) = pos
|
||||||
|
|
||||||
|
grid_row = int( current_row + 0.5 )
|
||||||
|
grid_col = int( current_col + 0.5 )
|
||||||
|
return ( grid_row, grid_col )
|
||||||
|
|
||||||
|
def sign( x ):
|
||||||
|
"""
|
||||||
|
Returns 1 or -1 depending on the sign of x
|
||||||
|
"""
|
||||||
|
if( x >= 0 ):
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def arrayInvert(array):
|
||||||
|
"""
|
||||||
|
Inverts a matrix stored as a list of lists.
|
||||||
|
"""
|
||||||
|
result = [[] for i in array]
|
||||||
|
for outer in array:
|
||||||
|
for inner in range(len(outer)):
|
||||||
|
result[inner].append(outer[inner])
|
||||||
|
return result
|
||||||
|
|
||||||
|
def matrixAsList( matrix, value = True ):
|
||||||
|
"""
|
||||||
|
Turns a matrix into a list of coordinates matching the specified value
|
||||||
|
"""
|
||||||
|
rows, cols = len( matrix ), len( matrix[0] )
|
||||||
|
cells = []
|
||||||
|
for row in range( rows ):
|
||||||
|
for col in range( cols ):
|
||||||
|
if matrix[row][col] == value:
|
||||||
|
cells.append( ( row, col ) )
|
||||||
|
return cells
|
||||||
|
|
||||||
|
def lookup(name, namespace):
|
||||||
|
"""
|
||||||
|
Get a method or class from any imported module from its name.
|
||||||
|
Usage: lookup(functionName, globals())
|
||||||
|
"""
|
||||||
|
dots = name.count('.')
|
||||||
|
if dots > 0:
|
||||||
|
moduleName, objName = '.'.join(name.split('.')[:-1]), name.split('.')[-1]
|
||||||
|
module = __import__(moduleName)
|
||||||
|
return getattr(module, objName)
|
||||||
|
else:
|
||||||
|
modules = [obj for obj in namespace.values() if str(type(obj)) == "<type 'module'>"]
|
||||||
|
options = [getattr(module, name) for module in modules if name in dir(module)]
|
||||||
|
options += [obj[1] for obj in namespace.items() if obj[0] == name ]
|
||||||
|
if len(options) == 1: return options[0]
|
||||||
|
if len(options) > 1: raise Exception, 'Name conflict for %s'
|
||||||
|
raise Exception, '%s not found as a method or class' % name
|
||||||
|
|
||||||
|
def pause():
|
||||||
|
"""
|
||||||
|
Pauses the output stream awaiting user feedback.
|
||||||
|
"""
|
||||||
|
print "<Press enter/return to continue>"
|
||||||
|
raw_input()
|
||||||
|
|
||||||
|
|
||||||
|
# code to handle timeouts
|
||||||
|
#
|
||||||
|
# FIXME
|
||||||
|
# NOTE: TimeoutFuncton is NOT reentrant. Later timeouts will silently
|
||||||
|
# disable earlier timeouts. Could be solved by maintaining a global list
|
||||||
|
# of active time outs. Currently, questions which have test cases calling
|
||||||
|
# this have all student code so wrapped.
|
||||||
|
#
|
||||||
|
import signal
|
||||||
|
import time
|
||||||
|
class TimeoutFunctionException(Exception):
|
||||||
|
"""Exception to raise on a timeout"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TimeoutFunction:
|
||||||
|
def __init__(self, function, timeout):
|
||||||
|
self.timeout = timeout
|
||||||
|
self.function = function
|
||||||
|
|
||||||
|
def handle_timeout(self, signum, frame):
|
||||||
|
raise TimeoutFunctionException()
|
||||||
|
|
||||||
|
def __call__(self, *args, **keyArgs):
|
||||||
|
# If we have SIGALRM signal, use it to cause an exception if and
|
||||||
|
# when this function runs too long. Otherwise check the time taken
|
||||||
|
# after the method has returned, and throw an exception then.
|
||||||
|
if hasattr(signal, 'SIGALRM'):
|
||||||
|
old = signal.signal(signal.SIGALRM, self.handle_timeout)
|
||||||
|
signal.alarm(self.timeout)
|
||||||
|
try:
|
||||||
|
result = self.function(*args, **keyArgs)
|
||||||
|
finally:
|
||||||
|
signal.signal(signal.SIGALRM, old)
|
||||||
|
signal.alarm(0)
|
||||||
|
else:
|
||||||
|
startTime = time.time()
|
||||||
|
result = self.function(*args, **keyArgs)
|
||||||
|
timeElapsed = time.time() - startTime
|
||||||
|
if timeElapsed >= self.timeout:
|
||||||
|
self.handle_timeout(None, None)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_ORIGINAL_STDOUT = None
|
||||||
|
_ORIGINAL_STDERR = None
|
||||||
|
_MUTED = False
|
||||||
|
|
||||||
|
class WritableNull:
|
||||||
|
def write(self, string):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def mutePrint():
|
||||||
|
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
|
||||||
|
if _MUTED:
|
||||||
|
return
|
||||||
|
_MUTED = True
|
||||||
|
|
||||||
|
_ORIGINAL_STDOUT = sys.stdout
|
||||||
|
#_ORIGINAL_STDERR = sys.stderr
|
||||||
|
sys.stdout = WritableNull()
|
||||||
|
#sys.stderr = WritableNull()
|
||||||
|
|
||||||
|
def unmutePrint():
|
||||||
|
global _ORIGINAL_STDOUT, _ORIGINAL_STDERR, _MUTED
|
||||||
|
if not _MUTED:
|
||||||
|
return
|
||||||
|
_MUTED = False
|
||||||
|
|
||||||
|
sys.stdout = _ORIGINAL_STDOUT
|
||||||
|
#sys.stderr = _ORIGINAL_STDERR
|
||||||
|
|
||||||
85
p3_rl/valueIterationAgents.py
Normal file
85
p3_rl/valueIterationAgents.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# valueIterationAgents.py
|
||||||
|
# -----------------------
|
||||||
|
# Licensing Information: You are free to use or extend these projects for
|
||||||
|
# educational purposes provided that (1) you do not distribute or publish
|
||||||
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||||
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||||
|
#
|
||||||
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||||
|
# The core projects and autograders were primarily created by John DeNero
|
||||||
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||||
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||||
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||||
|
|
||||||
|
|
||||||
|
import mdp, util
|
||||||
|
|
||||||
|
from learningAgents import ValueEstimationAgent
|
||||||
|
|
||||||
|
class ValueIterationAgent(ValueEstimationAgent):
|
||||||
|
"""
|
||||||
|
* Please read learningAgents.py before reading this.*
|
||||||
|
|
||||||
|
A ValueIterationAgent takes a Markov decision process
|
||||||
|
(see mdp.py) on initialization and runs value iteration
|
||||||
|
for a given number of iterations using the supplied
|
||||||
|
discount factor.
|
||||||
|
"""
|
||||||
|
def __init__(self, mdp, discount = 0.9, iterations = 100):
|
||||||
|
"""
|
||||||
|
Your value iteration agent should take an mdp on
|
||||||
|
construction, run the indicated number of iterations
|
||||||
|
and then act according to the resulting policy.
|
||||||
|
|
||||||
|
Some useful mdp methods you will use:
|
||||||
|
mdp.getStates()
|
||||||
|
mdp.getPossibleActions(state)
|
||||||
|
mdp.getTransitionStatesAndProbs(state, action)
|
||||||
|
mdp.getReward(state, action, nextState)
|
||||||
|
mdp.isTerminal(state)
|
||||||
|
"""
|
||||||
|
self.mdp = mdp
|
||||||
|
self.discount = discount
|
||||||
|
self.iterations = iterations
|
||||||
|
self.values = util.Counter() # A Counter is a dict with default 0
|
||||||
|
|
||||||
|
# Write value iteration code here
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
|
||||||
|
|
||||||
|
def getValue(self, state):
|
||||||
|
"""
|
||||||
|
Return the value of the state (computed in __init__).
|
||||||
|
"""
|
||||||
|
return self.values[state]
|
||||||
|
|
||||||
|
|
||||||
|
def computeQValueFromValues(self, state, action):
|
||||||
|
"""
|
||||||
|
Compute the Q-value of action in state from the
|
||||||
|
value function stored in self.values.
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def computeActionFromValues(self, state):
|
||||||
|
"""
|
||||||
|
The policy is the best action in the given state
|
||||||
|
according to the values currently stored in self.values.
|
||||||
|
|
||||||
|
You may break ties any way you see fit. Note that if
|
||||||
|
there are no legal actions, which is the case at the
|
||||||
|
terminal state, you should return None.
|
||||||
|
"""
|
||||||
|
"*** YOUR CODE HERE ***"
|
||||||
|
util.raiseNotDefined()
|
||||||
|
|
||||||
|
def getPolicy(self, state):
|
||||||
|
return self.computeActionFromValues(state)
|
||||||
|
|
||||||
|
def getAction(self, state):
|
||||||
|
"Returns the policy at the state (no exploration)."
|
||||||
|
return self.computeActionFromValues(state)
|
||||||
|
|
||||||
|
def getQValue(self, state, action):
|
||||||
|
return self.computeQValueFromValues(state, action)
|
||||||
Reference in New Issue
Block a user