;;; Definitions for Markov decision processes (MDPs). ;;; An MDP is defined by initial state, transition model, rewards, and ;;; distinguished terminal states. Model and rewards are hash tables ;;; index by state (after application of hash-key function). ;;; The entries in the model are alists keyed by action; each action is ;;; associated with an action model: basically a list of transitions. ;;; Markov chains (i.e., stochastic processes with no distinct agent) ;;; can be defined by allowing only a no-op action in the MDP. (defstruct mdp initial-state ;; The initial state for the problem (model (make-hash-table :test #'equal)) ;; Describes transition probabilities (rewards (make-hash-table :test #'equal)) ;; Rewards for each state (terminal-states nil) ;; List of terminal states (hash-key #'identity) ;; To convert states into hash keys name) ;; String, identifies the problem (defstruct (mdp-action-model (:type list)) (transitions nil) (times-executed 0)) (defstruct (transition (:type list)) destination probability (times-achieved 0)) (defun action-model (a s M) (cdr (assoc a (gethash s M) :test #'eq))) (defun transitions (a s M) "Returns the transitions resulting from executing action a in state s according to model M." (mdp-action-model-transitions (action-model a s M))) (defun actions (s M) "Returns the list of actions feasible in state s according to model M." (mapcar #'car (gethash s M)))