1   import interfaces.Controller;
2   import interfaces.AbstractRobot;
3   
4   import java.lang.*;
5   import java.util.*;
6   
7   /**
8   * A Q-learning light seeking controller which uses three light sensors
9   *
10  * @author Graham Ritchie
11  */
12  public class QLR3 extends Controller
13  {
14      /******************************************************************
15                    Variables, parameters & data structures
16      *******************************************************************/
17      
18      private AbstractRobot robot;    // this controller's robot
19      
20      private int STATES=4;           // no. of states
21      private int ACTIONS=5;          // no. of actions
22      private int LEEWAY=6;           // leeway for state decision
23      private int REWARD_LEEWAY=1;    // leeway for reward
24      private int SLEEP_TIME=500;
25      int STOP_THRESHOLD=1000;
26      
27      // sensor tyoe array
28      private int[] sensors={Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT};
29      
30      // states
31      private int S1=0;
32      private int S2=1;
33      private int S3=2;
34      private int S4=3;
35      
36      // actions
37      private int A1=0;
38      private int A2=1;
39      private int A3=2;
40      private int A4=3;
41      private int A5=4;
42      
43      // variables
44      private int state, newState, prevLight, newLight, reward, action;
45      private boolean running;
46      
47      // the Q table
48      private double table[][]=new double[STATES][ACTIONS]; 
49      
50      
51      /******************************************************************
52                        Methods required by Controller
53      *******************************************************************/
54      
55      public void initController(AbstractRobot r)
56      {
57          robot=r;
58          
59          // initialise the Q table
60          initTable();
61      }
62      
63      public int[] getSensors()
64      {
65          return sensors;
66      }
67      
68      public void run()
69      {
70          // set running to true
71          running=true;
72          
73          // call main loop (will return when running=false)
74          begin();
75      }
76      
77      public void halt()
78      {
79          // set running to false, this will force run() to return, and therefore 
80          // stop the controller's thread
81          running=false;
82          
83          // also stop the robot
84          robot.stopMoving();
85      }
86      
87      public AbstractRobot getRobot()
88      {
89          return robot;
90      }
91      
92      /******************************************************************
93                                Other methods
94      *******************************************************************/
95      
96      /**
97      * Generates a random number between 0 and the limit
98      *
99      * @param limit
100     * @return the random number as an int
101     */
102     public int rand(int limit)
103     {
104         return (int) (Math.random()*(limit+1));
105     } 
106     
107     /**
108     * Initialises the Q-table entries to random numbers between 0 and 3
109     */
110     public void initTable()
111     {
112         for (int i=0;i<STATES;i++)
113         {
114             for (int j=0;j<ACTIONS;j++)
115             {
116                 int x=rand(3);
117                 table[i][j]=x;
118             }           
119         }
120     }
121     
122     /**
123     * Updates the utility table according to the Q learning equation
124     */
125     public void updateTable()
126     {
127         // main q learning update equation
128         table[state][action]=table[state][action]+reward;
129     }
130     
131     /**
132     * The main Q(s,a) function
133     *
134     * @return the current best action to perform (as an int)
135     */
136     public int Q(int STATE)
137     {
138         int highest=0;
139         
140         for (int i=0;i<ACTIONS;i++)
141         {
142             if(table[STATE][i]>table[STATE][highest])
143             {
144                 highest=i;
145             }
146         }
147         
148         return highest;
149     }
150     
151     /**
152     * Sets the system going
153     */
154     public void begin()
155     {
156         // establish initial state of robot
157         
158         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
159         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
160         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
161         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
162         
163         // get current light level
164         newLight=robot.getSensor2();
165         
166         // main loop
167         while (running)
168         {
169             // save current light level
170             prevLight=newLight;
171             
172             // establish current state of robot
173             state=newState;
174             
175             // choose action randomly 10% of thetime
176             if(rand(9)==0){action=rand(ACTIONS-1);}
177             
178             // and according to Q table the rest
179             else{action=Q(state);}
180             
181             // perform chosen action
182             if(action==A1){robot.forward();}
183             if(action==A2){robot.backward();}
184             if(action==A3){robot.right();}
185             if(action==A4){robot.left();}
186             if(action==A5){robot.stopMoving();}
187             
188             // sleep for a wee bit to allow the action to have some effect
189             try{sleep(SLEEP_TIME);}catch(Exception e){}
190             
191             // stop robot to begin loop again
192             robot.stopMoving();
193             
194             // determine new light level
195             newLight=robot.getSensor2();
196             
197             // and reward accordingly
198             if (newLight > prevLight+REWARD_LEEWAY){reward=1;robot.beep();}
199             else if (newLight < prevLight){reward=-1;}
200             else {reward=0;}
201             
202             // establish new state of robot
203             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
204             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
205             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
206             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
207             
208             // update table
209             updateTable();
210             
211             // check if stop threshold is met
212             if(robot.getSensor2()>STOP_THRESHOLD)
213             {
214                 // stop
215                 robot.beep();
216                 robot.beep();
217                 robot.beep();
218                 break;
219             }
220         }
221     }
222 }
223