1 import interfaces.Controller;
2 import interfaces.AbstractRobot;
3
4 import java.lang.*;
5 import java.util.*;
6
7
12 public class QLR3decayExplore extends Controller
13 {
14
17
18 private AbstractRobot robot;
20 private int STATES=4; private int ACTIONS=5; private int LEEWAY=6; private int REWARD_LEEWAY=1; private int SLEEP_TIME=500;
25 int STOP_THRESHOLD=98;
26
27 private int[] sensors={Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT};
29
30 private int S1=0;
32 private int S2=1;
33 private int S3=2;
34 private int S4=3;
35
36 private int A1=0;
38 private int A2=1;
39 private int A3=2;
40 private int A4=3;
41 private int A5=4;
42
43 private int state, newState, prevLight, newLight, reward, action;
45 private boolean running;
46 private boolean paused=false;
47
48 private double table[][]=new double[STATES][ACTIONS];
50
51 private double learningRate=1.0;
53
54
57
58 public void initController(AbstractRobot r)
59 {
60 robot=r;
61
62 initTable();
64
65 sensors[0]=Controller.SENSOR_TYPE_LIGHT;
67 sensors[1]=Controller.SENSOR_TYPE_LIGHT;
68 sensors[2]=Controller.SENSOR_TYPE_LIGHT;
69 }
70
71 public int[] getSensors()
72 {
73 return sensors;
74 }
75
76 public void run()
77 {
78 running=true;
80
81 begin();
83 }
84
85 public void halt()
86 {
87 running=false;
90
91 robot.stopMoving();
93 }
94
95 public AbstractRobot getRobot()
96 {
97 return robot;
98 }
99
100
103
104
110 public int rand(int limit)
111 {
112 return (int) (Math.random()*(limit+1));
113 }
114
115
118 public void initTable()
119 {
120 for (int i=0;i<STATES;i++)
121 {
122 for (int j=0;j<ACTIONS;j++)
123 {
124 int x=rand(3);
125 table[i][j]=x;
126 }
127 }
128 }
129
130
133 public void updateTable()
134 {
135 table[state][action]=learningRate*(table[state][action]+reward);
137
138
150 }
151
152
157 public int Q(int STATE)
158 {
159 int highest=0;
160
161 for (int i=0;i<ACTIONS;i++)
162 {
163 if(table[STATE][i]>table[STATE][highest])
164 {
165 highest=i;
166 }
167 }
168
169 return highest;
170 }
171
172
175 public void begin()
176 {
177 int exploreRate=9;
178
179
181 if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
182 if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
183 if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
184 if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
185
186 newLight=robot.getSensor2();
188
189
193 while (running)
195 {
196 prevLight=newLight;
198
199 state=newState;
201
202 if(rand(exploreRate)==0)
204 {
205 action=rand(ACTIONS-1);
206
207 exploreRate++;
209 }
210 else {
212 action=Q(state);
213 }
214
215 if(action==A1){robot.forward();}
217 if(action==A2){robot.backward();}
218 if(action==A3){robot.right();}
219 if(action==A4){robot.left();}
220 if(action==A5){robot.stopMoving();}
221
222 try{sleep(SLEEP_TIME);}catch(Exception e){}
224
225 robot.stopMoving();
227
228 newLight=robot.getSensor2();
230
231 if (newLight > prevLight+REWARD_LEEWAY){reward=1;robot.beep();}
233 else if (newLight < prevLight){reward=-1;}
234 else {reward=0;}
235
236 if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
238 if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
239 if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
240 if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
241
242 updateTable();
244
245
248 if(robot.getSensor2()>STOP_THRESHOLD)
250 {
251 robot.beep();
253 robot.beep();
254 robot.beep();
255 learningRate=0.0;
256 }
257 }
258 }
259 }
260