Code: // SARSA.c : implementation file #include "Main.h" void getLightState(); void move(); void chooseActionPrime(); void updateTemporalDifference(); void updateState(); void findLightReward (); void run();
int state = 0;
int statePrime; statePrime= 0;
double reward; reward = 0;
double discountRate; discountRate = 0.25;
double learningRate; learningRate = 0.1;
double lastTD; lastTD = 0;
double td; td = 0;
int action; action = 0;
int actionPrime; actionPrime = 0;
int stateOfLightPrevious; stateOfLightPrevious = 0;
int indexOfQ; indexOfQ = 0;
int indexOfPreviousBestMove; indexOfPreviousBestMove = 0;
int indexOfBestMove; indexOfBestMove = 0;
double q [6400]; int frontBumpSensor; frontBumpSensor = 0;
int steps; steps = 0;
int reset; reset = 0;
int lightState; lightState = 0;
int lightStatePrevious; lightStatePrevious = 0;
double maxLight; maxLight = 0;
double lightValue; lightValue = 0; int void main () { run(); } void getLightState(){ //range is 0-500
lightValue = getAnalogInput(2); if (lightValue>= 0 && lightValue <50){ lightState = 0;
}else if (lightValue>= 50 && lightValue <100){ lightState = 1; }else if (lightValue>= 100 && lightValue < 150){ lightState = 2; }else if (lightValue>= 150 && lightValue < 200){ lightState = 3;
}else if (lightValue>= 200 && lightValue < 250){ lightState = 4; }else if (lightValue>= 250 && lightValue < 300){ lightState = 5; }else if (lightValue>= 300 && lightValue < 350){ lightState = 6; }else if (lightValue>= 350 && lightValue < 400){ lightState = 7; }else if (lightValue>= 400 && lightValue < 450){ lightState = 8; }else if (lightValue>= 450 && lightValue < 500){ lightState = 9; }
void move(){ if(actionPrime == 0){ // Forward SetMotor ( 1 , 127 ) ; SetMotor ( 10 , 127 ) ; }
if(actionPrime == 1){ // Left SetMotor ( 1 , 127 ) ; SetMotor ( 10 , -127 ) ; } if(actionPrime == 2){ // Right SetMotor ( 1 , -127 ) ; SetMotor ( 10 , -127 ) ; } } void chooseActionPrime(){ getLightState(); state = (lightState) + (lightStatePrevious * 10);
// actions : left right forward stay int = 1; for (int i = 0; i < 4; i++){ if (q[(i*100)+state]> q[indexOfBestMove]){ indexOfBestMove = ((i*100)+state); } } } void updateTemporalDifference(){ findLightReward(); td = learningRate * (reward+(discountRate*(q[indexOfBestMove] - q[indexOfPreviousBestMove]))); q[indexOfBestMove] += learningRate*td; } void updateState(){ stateOfLightPrevious = findLightState(); indexOfPreviousBestMove = indexOfBesMove; indexOfBestMove = 0; lastTD = td; } void findLightReward (){ reward=(lightValue/maxLight); } }
void run(){ move(); //implementation complete chooseActionPrime(); updateTemporalDifference(); updateState(); } } |