import interfaces.Controller;
import interfaces.AbstractRobot;
import java.lang.*;
import java.util.*;
public class QLR3new extends Controller
{
private AbstractRobot robot;
private int STATES=4; private int ACTIONS=5; private int LEEWAY=6; private int REWARD_LEEWAY=1; private int SLEEP_TIME=500;
int STOP_THRESHOLD=98;
private int[] sensors={Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT,Controller.SENSOR_TYPE_LIGHT};
private int S1=0;
private int S2=1;
private int S3=2;
private int S4=3;
private int A1=0;
private int A2=1;
private int A3=2;
private int A4=3;
private int A5=4;
private int state, newState, prevLight, newLight, reward, action;
private boolean running;
private boolean paused=false;
private double table[][]=new double[STATES][ACTIONS];
private double learningRate=1.0;
public void initController(AbstractRobot r)
{
robot=r;
initTable();
sensors[0]=Controller.SENSOR_TYPE_LIGHT;
sensors[1]=Controller.SENSOR_TYPE_LIGHT;
sensors[2]=Controller.SENSOR_TYPE_LIGHT;
}
public int[] getSensors()
{
return sensors;
}
public void run()
{
running=true;
begin();
}
public void halt()
{
running=false;
robot.stopMoving();
}
public AbstractRobot getRobot()
{
return robot;
}
public int rand(int limit)
{
return (int) (Math.random()*(limit+1));
}
public void initTable()
{
for (int i=0;i<STATES;i++)
{
for (int j=0;j<ACTIONS;j++)
{
int x=rand(3);
table[i][j]=x;
}
}
}
public void updateTable()
{
table[state][action]=learningRate*(table[state][action]+reward);
}
public int Q(int STATE)
{
int highest=0;
for (int i=0;i<ACTIONS;i++)
{
if(table[STATE][i]>table[STATE][highest])
{
highest=i;
}
}
return highest;
}
public void begin()
{
int exploreRate=9;
if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
newLight=robot.getSensor2();
while (running)
{
while (!paused && running) {
prevLight=newLight;
state=newState;
if(rand(exploreRate)==0)
{
action=rand(ACTIONS-1);
exploreRate++;
}
else{action=Q(state);}
if(action==A1){robot.forward();}
if(action==A2){robot.backward();}
if(action==A3){robot.right();}
if(action==A4){robot.left();}
if(action==A5){robot.stopMoving();}
try{sleep(SLEEP_TIME);}catch(Exception e){}
robot.stopMoving();
newLight=robot.getSensor2();
if (newLight > prevLight+REWARD_LEEWAY){reward=1;robot.beep();}
else if (newLight < prevLight){reward=-1;}
else {reward=0;}
if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S1;}
if(robot.getSensor1() > robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S2;}
if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() > robot.getSensor3()){newState=S3;}
if(robot.getSensor1() < robot.getSensor2() && robot.getSensor2() < robot.getSensor3()){newState=S4;}
updateTable();
if(robot.getSensor2()>STOP_THRESHOLD)
{
robot.beep();
robot.beep();
robot.beep();
learningRate=0.0;
}
}
try{Thread.sleep(SLEEP_TIME);}catch(Exception e){}
}
}
public void pause() {
paused=true;
}
public void unpause(){
paused=false;
}
}