/
MarkovDecisionProcess.java
79 lines (71 loc) · 2.18 KB
/
MarkovDecisionProcess.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package aima.core.probability.mdp;
import java.util.Set;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.<br>
* <br>
*
* A sequential decision problem for a fully observable, stochastic environment
* with a Markovian transition model and additive rewards is called a <b>Markov
* decision process</b>, or <b>MDP</b>, and consists of a set of states (with an
* initial state s<sub>0</sub>; a set ACTIONS(s) of actions in each state; a
* transition model P(s' | s, a); and a reward function R(s).<br>
* <br>
* <b>Note:</b> Some definitions of MDPs allow the reward to depend on the
* action and outcome too, so the reward function is R(s, a, s'). This
* simplifies the description of some environments but does not change the
* problem in any fundamental way.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public interface MarkovDecisionProcess<S, A> {
/**
* Get the set of states associated with the Markov decision process.
*
* @return the set of states associated with the Markov decision process.
*/
Set<S> states();
/**
* Get the initial state s<sub>0</sub> for this instance of a Markov
* decision process.
*
* @return the initial state s<sub>0</sub>.
*/
S getInitialState();
/**
* Get the set of actions for state s.
*
* @param s
* the state.
* @return the set of actions for state s.
*/
Set<A> actions(S s);
/**
* Return the probability of going from state s using action a to s' based
* on the underlying transition model P(s' | s, a).
*
* @param sDelta
* the state s' being transitioned to.
* @param s
* the state s being transitions from.
* @param a
* the action used to move from state s to s'.
* @return the probability of going from state s using action a to s'.
*/
double transitionProbability(S sDelta, S s, A a);
/**
* Get the reward associated with being in state s.
*
* @param s
* the state whose award is sought.
* @return the reward associated with being in state s.
*/
double reward(S s);
}