1   import interfaces.Controller;
2   import interfaces.AbstractRobot;
3   
4   import java.lang.*;
5   import java.util.*;
6   
7   /**
8   * Another version of the Q-learner whose explore rate decays over time
9   *
10  * @author Graham Ritchie
11  */
12  public class QLR3decayExplore extends Controller
13  {
14      /******************************************************************
15                    Variables, parameters & data structures
16      *******************************************************************/
17      
18      private AbstractRobot robot;    // this controller's robot
19      
20      private int STATES=4;           // no. of states
21      private int ACTIONS=5;          // no. of actions
22      private int LEEWAY=6;           // leeway for state decision
23      private int REWARD_LEEWAY=1;    // leeway for reward
24      private int SLEEP_TIME=500;
25      int STOP_THRESHOLD=98;
26      
27      // sensor type array
28      private int[] sensors={Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT};
29      
30      // states
31      private int S1=0;
32      private int S2=1;
33      private int S3=2;
34      private int S4=3;
35      
36      // actions
37      private int A1=0;
38      private int A2=1;
39      private int A3=2;
40      private int A4=3;
41      private int A5=4;
42      
43      // variables
44      private int state, newState, prevLight, newLight, reward, action;
45      private boolean running;
46      private boolean paused=false;   
47  
48      // the Q table
49      private double table[][]=new double[STATES][ACTIONS]; 
50      
51      // learning rate
52      private double learningRate=1.0;
53      
54      /******************************************************************
55                        Methods required by Controller
56      *******************************************************************/
57      
58      public void initController(AbstractRobot r)
59      {
60          robot=r;
61          
62          // initialise the Q table
63          initTable();
64          
65          // initialise sensor types
66          sensors[0]=Controller.SENSOR_TYPE_LIGHT;
67          sensors[1]=Controller.SENSOR_TYPE_LIGHT;
68          sensors[2]=Controller.SENSOR_TYPE_LIGHT;
69      }
70      
71      public int[] getSensors()
72      {
73          return sensors;
74      }
75      
76      public void run()
77      {
78          // set running to true
79          running=true;
80          
81          // call main loop (will return when running=false)
82          begin();
83      }
84      
85      public void halt()
86      {
87          // set running to false, this will force run() to return, and therefore 
88          // stop the controller's thread
89          running=false;
90          
91          // also stop the robot
92          robot.stopMoving();
93      }
94      
95      public AbstractRobot getRobot()
96      {
97          return robot;
98      }
99      
100     /******************************************************************
101                               Other methods
102     *******************************************************************/
103     
104     /**
105     * Generates a random number between 0 and the limit
106     *
107     * @param limit
108     * @return the random number as an int
109     */
110     public int rand(int limit)
111     {
112         return (int) (Math.random()*(limit+1));
113     } 
114     
115     /**
116     * Initialises the Q-table entries to 0.0
117     */
118     public void initTable()
119     {
120         for (int i=0;i<STATES;i++)
121         {
122             for (int j=0;j<ACTIONS;j++)
123             {
124                 int x=rand(3);
125                 table[i][j]=x;
126             }           
127         }
128     }
129     
130     /**
131     * Updates the utility table according to the Q learning equation
132     */
133     public void updateTable()
134     {
135         // main q learning update equation
136         table[state][action]=learningRate*(table[state][action]+reward);
137         
138         /*
139         // print the Q table, remember this can only happen in the simulator
140         System.out.println("\nQ table: ");
141         for (int i=0;i<STATES;i++)
142         {
143             for (int j=0;j<ACTIONS;j++)
144             {
145                 System.out.print(table[i][j]+" ");
146             }
147             System.out.println();
148         }
149         */
150     }
151     
152     /**
153     * The main Q(s,a) function
154     *
155     * @return the current best action to perform (as an int)
156     */
157     public int Q(int STATE)
158     {
159         int highest=0;
160         
161         for (int i=0;i<ACTIONS;i++)
162         {
163             if(table[STATE][i]>table[STATE][highest])
164             {
165                 highest=i;
166             }
167         }
168         
169         return highest;
170     }
171     
172     /**
173     * Sets the system going
174     */
175     public void begin()
176     {
177         int exploreRate=9;
178         
179         // establish initial state of robot
180         
181         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
182         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
183         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
184         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
185         
186         // get current light level
187         newLight=robot.getSensor2();
188         
189         // the tolerance of the robot to light, used to help move 
190         // the robot towards light
191         // int tolerance=0;
192         
193         // main loop
194         while (running)
195         {
196             // save current light level
197             prevLight=newLight;
198             
199             // establish current state of robot
200             state=newState;
201             
202             // choose action randomly according to explore rate
203             if(rand(exploreRate)==0)
204             {
205                 action=rand(ACTIONS-1);
206                 
207                 // increment explore rate (this means we will explore less)
208                 exploreRate++;
209             }
210             else // and according to Q table the rest
211             {
212                 action=Q(state);
213             }
214             
215             // perform chosen action
216             if(action==A1){robot.forward();}
217             if(action==A2){robot.backward();}
218             if(action==A3){robot.right();}
219             if(action==A4){robot.left();}
220             if(action==A5){robot.stopMoving();}
221             
222             // sleep for a wee bit to allow the action to have some effect
223             try{sleep(SLEEP_TIME);}catch(Exception e){}
224             
225             // stop robot to begin loop again
226             robot.stopMoving();
227             
228             // determine new light level
229             newLight=robot.getSensor2();
230             
231             // and reward accordingly
232             if (newLight > prevLight+REWARD_LEEWAY){reward=1;robot.beep();}
233             else if (newLight < prevLight){reward=-1;}
234             else {reward=0;}
235             
236             // establish new state of robot
237             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
238             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
239             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
240             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
241             
242             // update table
243             updateTable();
244             
245             // decay learning rate
246             //learningRate-=0.02;
247             
248             // check if stop threshold is met
249             if(robot.getSensor2()>STOP_THRESHOLD)
250             {
251                 // stop
252                 robot.beep();
253                 robot.beep();
254                 robot.beep();
255                 learningRate=0.0;
256             }
257         }
258     }
259 }
260