intro2ai/p3_rl/mdp.py

# mdp.py
# ------
# Licensing Information:  You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
# 
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).


import random

class MarkovDecisionProcess:

    def getStates(self):
        """
        Return a list of all states in the MDP.
        Not generally possible for large MDPs.
        """
        abstract

    def getStartState(self):
        """
        Return the start state of the MDP.
        """
        abstract

    def getPossibleActions(self, state):
        """
        Return list of possible actions from 'state'.
        """
        abstract

    def getTransitionStatesAndProbs(self, state, action):
        """
        Returns list of (nextState, prob) pairs
        representing the states reachable
        from 'state' by taking 'action' along
        with their transition probabilities.

        Note that in Q-Learning and reinforcment
        learning in general, we do not know these
        probabilities nor do we directly model them.
        """
        abstract

    def getReward(self, state, action, nextState):
        """
        Get the reward for the state, action, nextState transition.

        Not available in reinforcement learning.
        """
        abstract

    def isTerminal(self, state):
        """
        Returns true if the current state is a terminal state.  By convention,
        a terminal state has zero future rewards.  Sometimes the terminal state(s)
        may have no possible actions.  It is also common to think of the terminal
        state as having a self-loop action 'pass' with zero reward; the formulations
        are equivalent.
        """
        abstract
Add project 3 RL template. 2021-11-27 16:16:51 +01:00			`# mdp.py`
			`# ------`
			`# Licensing Information: You are free to use or extend these projects for`
			`# educational purposes provided that (1) you do not distribute or publish`
			`# solutions, (2) you retain this notice, and (3) you provide clear`
			`# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.`
			`#`
			`# Attribution Information: The Pacman AI projects were developed at UC Berkeley.`
			`# The core projects and autograders were primarily created by John DeNero`
			`# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).`
			`# Student side autograding was added by Brad Miller, Nick Hay, and`
			`# Pieter Abbeel (pabbeel@cs.berkeley.edu).`


			`import random`

			`class MarkovDecisionProcess:`

			`def getStates(self):`
			`"""`
			`Return a list of all states in the MDP.`
			`Not generally possible for large MDPs.`
			`"""`
			`abstract`

			`def getStartState(self):`
			`"""`
			`Return the start state of the MDP.`
			`"""`
			`abstract`

			`def getPossibleActions(self, state):`
			`"""`
			`Return list of possible actions from 'state'.`
			`"""`
			`abstract`

			`def getTransitionStatesAndProbs(self, state, action):`
			`"""`
			`Returns list of (nextState, prob) pairs`
			`representing the states reachable`
			`from 'state' by taking 'action' along`
			`with their transition probabilities.`

			`Note that in Q-Learning and reinforcment`
			`learning in general, we do not know these`
			`probabilities nor do we directly model them.`
			`"""`
			`abstract`

			`def getReward(self, state, action, nextState):`
			`"""`
			`Get the reward for the state, action, nextState transition.`

			`Not available in reinforcement learning.`
			`"""`
			`abstract`

			`def isTerminal(self, state):`
			`"""`
			`Returns true if the current state is a terminal state. By convention,`
			`a terminal state has zero future rewards. Sometimes the terminal state(s)`
			`may have no possible actions. It is also common to think of the terminal`
			`state as having a self-loop action 'pass' with zero reward; the formulations`
			`are equivalent.`
			`"""`
			`abstract`