334 lines
11 KiB
Python
334 lines
11 KiB
Python
# graphicsCrawlerDisplay.py
|
|
# -------------------------
|
|
# Licensing Information: You are free to use or extend these projects for
|
|
# educational purposes provided that (1) you do not distribute or publish
|
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
|
#
|
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
|
# The core projects and autograders were primarily created by John DeNero
|
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
|
|
|
|
|
# graphicsCrawlerDisplay.py
|
|
# -------------------------
|
|
# Licensing Information: Please do not distribute or publish solutions to this
|
|
# project. You are free to use and extend these projects for educational
|
|
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
|
|
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
|
# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
|
|
# Abbeel in Spring 2013.
|
|
# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
|
|
|
|
import Tkinter
|
|
import qlearningAgents
|
|
import time
|
|
import threading
|
|
import sys
|
|
import crawler
|
|
#import pendulum
|
|
import math
|
|
from math import pi as PI
|
|
|
|
robotType = 'crawler'
|
|
|
|
class Application:
|
|
|
|
def sigmoid(self, x):
|
|
return 1.0 / (1.0 + 2.0 ** (-x))
|
|
|
|
def incrementSpeed(self, inc):
|
|
self.tickTime *= inc
|
|
# self.epsilon = min(1.0, self.epsilon)
|
|
# self.epsilon = max(0.0,self.epsilon)
|
|
# self.learner.setSpeed(self.epsilon)
|
|
self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
|
|
|
|
def incrementEpsilon(self, inc):
|
|
self.ep += inc
|
|
self.epsilon = self.sigmoid(self.ep)
|
|
self.learner.setEpsilon(self.epsilon)
|
|
self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
|
|
|
|
def incrementGamma(self, inc):
|
|
self.ga += inc
|
|
self.gamma = self.sigmoid(self.ga)
|
|
self.learner.setDiscount(self.gamma)
|
|
self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
|
|
|
|
def incrementAlpha(self, inc):
|
|
self.al += inc
|
|
self.alpha = self.sigmoid(self.al)
|
|
self.learner.setLearningRate(self.alpha)
|
|
self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
|
|
|
|
def __initGUI(self, win):
|
|
## Window ##
|
|
self.win = win
|
|
|
|
## Initialize Frame ##
|
|
win.grid()
|
|
self.dec = -.5
|
|
self.inc = .5
|
|
self.tickTime = 0.1
|
|
|
|
## Epsilon Button + Label ##
|
|
self.setupSpeedButtonAndLabel(win)
|
|
|
|
self.setupEpsilonButtonAndLabel(win)
|
|
|
|
## Gamma Button + Label ##
|
|
self.setUpGammaButtonAndLabel(win)
|
|
|
|
## Alpha Button + Label ##
|
|
self.setupAlphaButtonAndLabel(win)
|
|
|
|
## Exit Button ##
|
|
#self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
|
|
#self.exit_button.grid(row=0, column=9)
|
|
|
|
## Simulation Buttons ##
|
|
# self.setupSimulationButtons(win)
|
|
|
|
## Canvas ##
|
|
self.canvas = Tkinter.Canvas(root, height=200, width=1000)
|
|
self.canvas.grid(row=2,columnspan=10)
|
|
|
|
def setupAlphaButtonAndLabel(self, win):
|
|
self.alpha_minus = Tkinter.Button(win,
|
|
text="-",command=(lambda: self.incrementAlpha(self.dec)))
|
|
self.alpha_minus.grid(row=1, column=3, padx=10)
|
|
|
|
self.alpha = self.sigmoid(self.al)
|
|
self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
|
|
self.alpha_label.grid(row=1, column=4)
|
|
|
|
self.alpha_plus = Tkinter.Button(win,
|
|
text="+",command=(lambda: self.incrementAlpha(self.inc)))
|
|
self.alpha_plus.grid(row=1, column=5, padx=10)
|
|
|
|
def setUpGammaButtonAndLabel(self, win):
|
|
self.gamma_minus = Tkinter.Button(win,
|
|
text="-",command=(lambda: self.incrementGamma(self.dec)))
|
|
self.gamma_minus.grid(row=1, column=0, padx=10)
|
|
|
|
self.gamma = self.sigmoid(self.ga)
|
|
self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
|
|
self.gamma_label.grid(row=1, column=1)
|
|
|
|
self.gamma_plus = Tkinter.Button(win,
|
|
text="+",command=(lambda: self.incrementGamma(self.inc)))
|
|
self.gamma_plus.grid(row=1, column=2, padx=10)
|
|
|
|
def setupEpsilonButtonAndLabel(self, win):
|
|
self.epsilon_minus = Tkinter.Button(win,
|
|
text="-",command=(lambda: self.incrementEpsilon(self.dec)))
|
|
self.epsilon_minus.grid(row=0, column=3)
|
|
|
|
self.epsilon = self.sigmoid(self.ep)
|
|
self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
|
|
self.epsilon_label.grid(row=0, column=4)
|
|
|
|
self.epsilon_plus = Tkinter.Button(win,
|
|
text="+",command=(lambda: self.incrementEpsilon(self.inc)))
|
|
self.epsilon_plus.grid(row=0, column=5)
|
|
|
|
def setupSpeedButtonAndLabel(self, win):
|
|
self.speed_minus = Tkinter.Button(win,
|
|
text="-",command=(lambda: self.incrementSpeed(.5)))
|
|
self.speed_minus.grid(row=0, column=0)
|
|
|
|
self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
|
|
self.speed_label.grid(row=0, column=1)
|
|
|
|
self.speed_plus = Tkinter.Button(win,
|
|
text="+",command=(lambda: self.incrementSpeed(2)))
|
|
self.speed_plus.grid(row=0, column=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def skip5kSteps(self):
|
|
self.stepsToSkip = 5000
|
|
|
|
def __init__(self, win):
|
|
|
|
self.ep = 0
|
|
self.ga = 2
|
|
self.al = 2
|
|
self.stepCount = 0
|
|
## Init Gui
|
|
|
|
self.__initGUI(win)
|
|
|
|
# Init environment
|
|
if robotType == 'crawler':
|
|
self.robot = crawler.CrawlingRobot(self.canvas)
|
|
self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
|
|
elif robotType == 'pendulum':
|
|
self.robot = pendulum.PendulumRobot(self.canvas)
|
|
self.robotEnvironment = \
|
|
pendulum.PendulumRobotEnvironment(self.robot)
|
|
else:
|
|
raise "Unknown RobotType"
|
|
|
|
# Init Agent
|
|
simulationFn = lambda agent: \
|
|
simulation.SimulationEnvironment(self.robotEnvironment,agent)
|
|
actionFn = lambda state: \
|
|
self.robotEnvironment.getPossibleActions(state)
|
|
self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
|
|
|
|
self.learner.setEpsilon(self.epsilon)
|
|
self.learner.setLearningRate(self.alpha)
|
|
self.learner.setDiscount(self.gamma)
|
|
|
|
# Start GUI
|
|
self.running = True
|
|
self.stopped = False
|
|
self.stepsToSkip = 0
|
|
self.thread = threading.Thread(target=self.run)
|
|
self.thread.start()
|
|
|
|
|
|
def exit(self):
|
|
self.running = False
|
|
for i in range(5):
|
|
if not self.stopped:
|
|
time.sleep(0.1)
|
|
try:
|
|
self.win.destroy()
|
|
except:
|
|
pass
|
|
sys.exit(0)
|
|
|
|
def step(self):
|
|
|
|
self.stepCount += 1
|
|
|
|
state = self.robotEnvironment.getCurrentState()
|
|
actions = self.robotEnvironment.getPossibleActions(state)
|
|
if len(actions) == 0.0:
|
|
self.robotEnvironment.reset()
|
|
state = self.robotEnvironment.getCurrentState()
|
|
actions = self.robotEnvironment.getPossibleActions(state)
|
|
print 'Reset!'
|
|
action = self.learner.getAction(state)
|
|
if action == None:
|
|
raise 'None action returned: Code Not Complete'
|
|
nextState, reward = self.robotEnvironment.doAction(action)
|
|
self.learner.observeTransition(state, action, nextState, reward)
|
|
|
|
def animatePolicy(self):
|
|
if robotType != 'pendulum':
|
|
raise 'Only pendulum can animatePolicy'
|
|
|
|
|
|
totWidth = self.canvas.winfo_reqwidth()
|
|
totHeight = self.canvas.winfo_reqheight()
|
|
|
|
length = 0.48 * min(totWidth, totHeight)
|
|
x,y = totWidth-length-30, length+10
|
|
|
|
|
|
|
|
angleMin, angleMax = self.robot.getMinAndMaxAngle()
|
|
velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
|
|
|
|
if not 'animatePolicyBox' in dir(self):
|
|
self.canvas.create_line(x,y,x+length,y)
|
|
self.canvas.create_line(x+length,y,x+length,y-length)
|
|
self.canvas.create_line(x+length,y-length,x,y-length)
|
|
self.canvas.create_line(x,y-length,x,y)
|
|
self.animatePolicyBox = 1
|
|
self.canvas.create_text(x+length/2,y+10,text='angle')
|
|
self.canvas.create_text(x-30,y-length/2,text='velocity')
|
|
self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
|
|
self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
|
|
self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
|
|
|
|
|
|
|
|
angleDelta = (angleMax-angleMin) / 100
|
|
velDelta = (velMax-velMin) / 100
|
|
for i in range(100):
|
|
angle = angleMin + i * angleDelta
|
|
|
|
for j in range(100):
|
|
vel = velMin + j * velDelta
|
|
state = self.robotEnvironment.getState(angle,vel)
|
|
max, argMax = None, None
|
|
if not self.learner.seenState(state):
|
|
argMax = 'unseen'
|
|
else:
|
|
for action in ('kickLeft','kickRight','doNothing'):
|
|
qVal = self.learner.getQValue(state, action)
|
|
if max == None or qVal > max:
|
|
max, argMax = qVal, action
|
|
if argMax != 'unseen':
|
|
if argMax == 'kickLeft':
|
|
color = 'blue'
|
|
elif argMax == 'kickRight':
|
|
color = 'red'
|
|
elif argMax == 'doNothing':
|
|
color = 'white'
|
|
dx = length / 100.0
|
|
dy = length / 100.0
|
|
x0, y0 = x+i*dx, y-j*dy
|
|
self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
self.stepCount = 0
|
|
self.learner.startEpisode()
|
|
while True:
|
|
minSleep = .01
|
|
tm = max(minSleep, self.tickTime)
|
|
time.sleep(tm)
|
|
self.stepsToSkip = int(tm / self.tickTime) - 1
|
|
|
|
if not self.running:
|
|
self.stopped = True
|
|
return
|
|
for i in range(self.stepsToSkip):
|
|
self.step()
|
|
self.stepsToSkip = 0
|
|
self.step()
|
|
# self.robot.draw()
|
|
self.learner.stopEpisode()
|
|
|
|
def start(self):
|
|
self.win.mainloop()
|
|
|
|
|
|
|
|
|
|
|
|
def run():
|
|
global root
|
|
root = Tkinter.Tk()
|
|
root.title( 'Crawler GUI' )
|
|
root.resizable( 0, 0 )
|
|
|
|
# root.mainloop()
|
|
|
|
|
|
app = Application(root)
|
|
def update_gui():
|
|
app.robot.draw(app.stepCount, app.tickTime)
|
|
root.after(10, update_gui)
|
|
update_gui()
|
|
|
|
root.protocol( 'WM_DELETE_WINDOW', app.exit)
|
|
try:
|
|
app.start()
|
|
except:
|
|
app.exit()
|