1   import interfaces.Controller;
2   import interfaces.AbstractRobot;
3   
4   import java.lang.*;
5   import java.util.*;
6   
7   /**
8   * An 'inverse' version of the Q-learner - instead of being rewarded for seeking 
9   * light the robot gets rewarded for getting further away from it
10  *
11  * @author Graham Ritchie
12  */
13  public class InverseQLR3 extends Controller
14  {
15      /******************************************************************
16                    Variables, parameters & data structures
17      *******************************************************************/
18      
19      private AbstractRobot robot;    // this controller's robot
20      
21      private int STATES=4;           // no. of states
22      private int ACTIONS=5;          // no. of actions
23      private int LEEWAY=6;           // leeway for state decision
24      private int REWARD_LEEWAY=1;    // leeway for reward
25      private int SLEEP_TIME=500;
26      int STOP_THRESHOLD=1000;
27      
28      // sensor type array
29      private int[] sensors={Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT};
30      
31      // states
32      private int S1=0;
33      private int S2=1;
34      private int S3=2;
35      private int S4=3;
36      
37      // actions
38      private int A1=0;
39      private int A2=1;
40      private int A3=2;
41      private int A4=3;
42      private int A5=4;
43      
44      // variables
45      private int state, newState, prevLight, newLight, reward, action;
46      private boolean running;
47      
48      // the Q table
49      private double table[][]=new double[STATES][ACTIONS]; 
50      
51      
52      /******************************************************************
53                        Methods required by Controller
54      *******************************************************************/
55      
56      public void initController(AbstractRobot r)
57      {
58          robot=r;
59          
60          // initialise the Q table
61          initTable();
62      }
63      
64      public int[] getSensors()
65      {
66          return sensors;
67      }
68      
69      public void run()
70      {
71          // set running to true
72          running=true;
73          
74          // call main loop (will return when running=false)
75          begin();
76      }
77      
78      public void halt()
79      {
80          // set running to false, this will force run() to return, and therefore 
81          // stop the controller's thread
82          running=false;
83          
84          // also stop the robot
85          robot.stopMoving();
86      }
87      
88      public AbstractRobot getRobot()
89      {
90          return robot;
91      }
92      
93      /******************************************************************
94                                Other methods
95      *******************************************************************/
96      
97      /**
98      * Generates a random number between 0 and the limit
99      *
100     * @param limit
101     * @return the random number as an int
102     */
103     public int rand(int limit)
104     {
105         return (int) (Math.random()*(limit+1));
106     } 
107     
108     /**
109     * Initialises the Q-table entries to random numbers between 0 and 3
110     */
111     public void initTable()
112     {
113         for (int i=0;i<STATES;i++)
114         {
115             for (int j=0;j<ACTIONS;j++)
116             {
117                 int x=rand(3);
118                 table[i][j]=x;
119             }           
120         }
121     }
122     
123     /**
124     * Updates the utility table according to the Q learning equation
125     */
126     public void updateTable()
127     {
128         // main q learning update equation
129         table[state][action]=table[state][action]+reward;
130     }
131     
132     /**
133     * The main Q(s,a) function
134     *
135     * @return the current best action to perform (as an int)
136     */
137     public int Q(int STATE)
138     {
139         int highest=0;
140         
141         for (int i=0;i<ACTIONS;i++)
142         {
143             if(table[STATE][i]>table[STATE][highest])
144             {
145                 highest=i;
146             }
147         }
148         
149         return highest;
150     }
151     
152     /**
153     * Sets the system going
154     */
155     public void begin()
156     {
157         int exploreRate=9;
158         
159         // establish initial state of robot
160         
161         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
162         if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
163         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
164         if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
165         
166         // get current light level
167         newLight=robot.getSensor2();
168         
169         // the tolerance of the robot to light, used to help move 
170         // the robot towards light
171         // int tolerance=0;
172         
173         // main loop
174         while (running)
175         {
176 
177             // save current light level
178             prevLight=newLight;
179             
180             // establish current state of robot
181             state=newState;
182             
183             // choose action randomly 10% of thetime
184             //if(rand(9)==0){action=rand(ACTIONS-1);}
185             if(rand(exploreRate)==0)
186             {
187                 action=rand(ACTIONS-1);
188                 // System.out.println("Exploring... rate: "+exploreRate);
189                 exploreRate++;
190             }
191             // and according to Q table the rest
192             else{action=Q(state);}
193             
194             // perform chosen action
195             if(action==A1){robot.forward();}
196             if(action==A2){robot.backward();}
197             if(action==A3){robot.right();}
198             if(action==A4){robot.left();}
199             if(action==A5){robot.stopMoving();}
200             
201             // sleep for a wee bit to allow the action to have some effect
202             try{sleep(SLEEP_TIME);}catch(Exception e){}
203             
204             // stop robot to begin loop again
205             robot.stopMoving();
206             
207             // determine new light level
208             newLight=robot.getSensor2();
209             
210             // and reward accordingly
211             if (newLight > prevLight+REWARD_LEEWAY){reward=-1;robot.beep();}
212             else if (newLight < prevLight){reward=1;}
213             else {reward=0;}
214             
215             // establish new state of robot
216             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
217             if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
218             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
219             if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
220             
221             // update table
222             updateTable();
223             
224             // check if stop threshold is met
225             if(robot.getSensor2()>STOP_THRESHOLD)
226             {
227                 // stop
228                 robot.beep();
229                 robot.beep();
230                 robot.beep();
231                 break;
232             }
233         }
234     }
235 }
236