rlabstr-3d.cpp

/**
 * @file 
 * @author  Michal Malý <mmmaly+rlabstr@gmail.com>
 * @version 2.0
 *
 * @section LICENSE
 *
 * @section DESCRIPTION
 *
 * The source code for Reinforcement Learning with Abstraction
 */

#include<iostream>
#include<iomanip>
#include<sstream>
#include<fstream>
#include<vector>
#include<cstdlib>
#include<cmath>

#include<assert.h>
#include<sys/times.h>


using namespace std;


int include_threshold = 0;/**<Adaptive threshold for including states and edges from the old model.*/

/*(beginning at 10 time steps and increasing 1.5-times at each unsuccesfull attempt and decreasing 2-times at each success) - TODO: update this
*/


int actions;/**<Number of actions.*/
int states;/**<Number of states.*/
int observations;/**<Number of observations.*/


int offset_obs;/**<For the variables of the resulting CNF formula for SAT solver, this describes the offset -- beginning of the block of variables for observation predicates.*/
int offset_tr;/**<For the variables of the resulting CNF formula for SAT solver, this describes the offset -- beginning of the block of variables for transition predicates.*/
int offset_pos;/**<For the variables of the resulting CNF formula for SAT solver, this describes the offset -- beginning of the block of variables for position predicates.*/


vector<int> data_observation;/**<Data remembered by the agent -- "experience" of what was observer at given time.*/
vector<int> data_action;/**<Data remembered by the agent -- "experience" of what was done at given time -- which action was executed.*/

/** Prints out help and syntax for the program.*/
void help()
{
	    cerr << "Syntax: " << "run-environment" << " <maze-file>" << endl;
	    cerr << "The <maze-file> specifies the maze." << endl;	    
//	    cerr << "The file <file-with-history> should contain history log of actions and observations." << endl;
//	    cerr << "The file <file-with-cnf-solution> should contain solution of the CNF formula -- the output of SAT solver. (The CNF formula file is generated by the generator program.)" << endl;
//	    cerr << "A file <output-graph-file> will be created or overwritten with graph of model, in the format for the the dot program (graphviz library)." << endl;
	    exit(1);
}

int xsize;/**<Horizontal size of the environment.*/
int ysize;/**<Vertical size of the environment.*/
int zsize;/**<Height of the environment.*/

vector<vector<vector<int> > > maze;/**<Maze data -- maze[x][y] is 0/1 if there is a free space/a wall.*/
vector<vector<vector<int> > > maze_visited;/**<Time of agent's first visit of the cell, -1 othervise. Recorded for visualisation and debugging purpose.*/

/** Returns observation from the environment. For the maze, the observation returned corresponds to the action -- for the action number x, the x-th bit is set in the observation if there is a wall in the direction of the action x.
*/
int get_observation(int x, int y, int z)
{
  assert(x>=1);
  assert(x<xsize-1);
  assert(y>=1);
  assert(y<ysize-1);
  assert(z>=1);
  assert(z<zsize-1);
  
  return (
  1*maze[x+1][y][z] + 
  2*maze[x][y+1][z] +
  4*maze[x-1][y][z] + 
  8*maze[x][y-1][z] +
  16*maze[x][y][z+1] + 
  32*maze[x][y][z-1] 
   ); //so the action number corresponds to the bit set/unset in the observation when wall in the direction of the action is present
 

}

/** Writes history of the observations into the file.
*/
void writehistory(int time_step)
{
  ofstream historyout("history.txt");
  
  historyout << data_observation[0] << " ";
    for(int t=0;t<time_step;t++)
	    historyout << data_action[t] << " " << data_observation[t+1] << " ";
}


/** Returns the number of CNF variable -- do we observe o in the state s?*/
int obs(int o,int s)
{
	return offset_obs + o*states+s;
}
/** Returns the number of CNF variable -- is there a transition from s1 to s2 on action a?*/
int tr(int s1,int a,int s2)
{
	return offset_tr + (s1*actions+a)*states+s2;
}
/** Returns the number of CNF variable -- are we in state s at the time t?*/
int pos(int t, int s)
{
	return offset_pos + t*states + s;
}

/** Gets observation from the remembered history of observations. */
int observation(int t)
{
//	if(t%20==2 || t%20==4)
//		return 1;
//	return 0;
    return data_observation[t];
}
/**Gets action from the remembered history of actions.*/
int what_action(int t)
{
	return data_action[t];
}

/** Constructs a vector<int> from three variables.*/
vector<int> tuple(int x1, int x2, int x3)
{
    vector<int> ret;
    ret.push_back(x1);
    ret.push_back(x2);
    ret.push_back(x3);

    return ret;
}

/** Constructs a vector<int> from three variables.*/
vector<int> tuple(int x1, int x2)
{
    vector<int> ret;
    ret.push_back(x1);
    ret.push_back(x2);
    
    return ret;
}

/** Constructs a vector<int> from one variable.*/
vector<int> tuple(int x1)
{
    vector<int> ret;
    ret.push_back(x1);
    
    return ret;
}

/**
 * \defgroup ModelOfTheWorld Agent's model of the world and derived data.
 * @{
 */

vector<vector<int> > transition_state_action;/**<Describes the transition function, i.e. in some state, what happens when we execute some action.*/
vector<vector<bool> > transition_state_action_verified;/**<Has the agent executed that action in his experience?*/
vector<vector<int> > transition_state_action_visited;/**<Last time we visited this edge (executed that action).*/
vector<int> state_visited;/**<Last time we visited this state.*/
vector<int> state_statistics;/**<Number of times we wisited the state.*/
bool all_world_explored = false; /**<Agent indicates whether we have explored everything.*/

/**@}*/

int count_old_states = 0;/**<Count of the states used from the old model to produce a new model.*/
int count_old_edges= 0;/**<Count of the edges used from the old model to produce a new model.*/

	
/** Generates a CNF file with the clauses according to the recorded observation and action history.
*/
void generate_cnf(bool include_old = false)
{
	
//int akcii,states,observations,time_step;
//int offset_obs,offset_tr,offset_pos;

int time_step;

vector<int> data_observation;
vector<int> data_action;


	if(states<0 || states > 10000)
	{
	    cerr << "Wrong number of states!" << endl << endl;
	    help();
	}
	
  
	ifstream historyin("history.txt");
	if(!historyin)
	{
	    cerr << "Could not open file "  << endl << endl;
	    help();
	}

	ofstream cnfout("hist.cnf.test");
	if(!cnfout)
	{
	    cerr << "Could not open file "  << endl << endl;
	    help();
	}
	
//#include"params.h"

	vector<vector<int> > clauses;

	data_observation.resize(1);
	historyin >> data_observation[0];
	
	for(time_step=0;time_step<10000;time_step++)
	{
	    int a,p;
	    historyin >> a >> p;
	    if(!historyin)
		break;
	    data_action.push_back(a);
	    data_observation.push_back(p);
	}

	cerr << "time_step=" << time_step << endl;

	//int offset_obs,offset_tr,offset_pos;

	offset_obs = 1;
	offset_tr = offset_obs + observations*states;
	offset_pos = offset_tr + states*actions*states;
	
	int variables_count = offset_pos + (time_step+1) * states -1;
	

	for(int s=0;s<states;s++)
	{
		for(int o=0;o<observations;o++)
			for(int o2=0;o2<observations;o2++)	
				if(o!=o2)
					clauses.push_back(tuple(-obs(o,s),-obs(o2,s)));
	}

	for(int s=0;s<states;s++)
	{
		vector<int> vars;
		for(int o=0;o<observations;o++)
			vars.push_back(obs(o,s));
		clauses.push_back(vars);
	}

	
	for(int s=0;s<states;s++)
		for(int s2=0;s2<states;s2++)
			for(int s3=0;s3<states;s3++)
			{
				for(int a=0;a<actions;a++)
					if(s2!=s3)
						clauses.push_back(tuple(-tr(s,a,s2),-tr(s,a,s3)));
			}
	
	for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		{
			vector<int> vars;
			for(int s2=0;s2<states;s2++)
				vars.push_back(tr(s,a,s2));
			clauses.push_back(vars);
		}

	for(int t=0;t<time_step;t++)
		for(int s=0;s<states;s++)
			for(int s2=0;s2<states;s2++)
				if(s!=s2)
					clauses.push_back(tuple(-pos(t,s),-pos(t,s2)));
	
	for(int t=0;t<=time_step;t++)
	{
		vector<int> vars;
		for(int s=0;s<states;s++)
			vars.push_back(pos(t,s));
		clauses.push_back(vars);
	}
	
	clauses.push_back(tuple(pos(0,0)));
				
	for(int t=0;t<=time_step;t++)
		for(int s=0;s<states;s++)
		{
			clauses.push_back(tuple(obs(observation(t),s),-pos(t,s)));
		}
		
	for(int t=0;t<time_step;t++)
		for(int s=0;s<states;s++)
			for(int s2=0;s2<states;s2++)
			{
				clauses.push_back(tuple(tr(s,what_action(t),s2),-pos(t,s),-pos(t+1,s2)));
			}

/* Here we add an additional assumption, that pairs of actions up-down and left-right are opposite, i.e. if the agent gets by action from state s1 to a different (!) state s2, he also gets from s2 to s1 by action 2. The same with actions 1 and 3.

        tr(s1,a,s2) <=> tr(s2,a',s1)    for (a,a') \in {(0,2),(1,3)} and s1!=s2
        x => y AND y=> x

        not x OR y
        not y OR x
*/

        for(int s1=0;s1<states;s1++)
                for(int s2=0;s2<states;s2++)
                        {
                                if(s1!=s2)
                                {
                                        clauses.push_back(tuple(-tr(s1,0,s2),tr(s2,2,s1)));
                                        clauses.push_back(tuple(-tr(s2,2,s1),tr(s1,0,s2)));
                                
                                        clauses.push_back(tuple(-tr(s1,1,s2),tr(s2,3,s1)));
                                        clauses.push_back(tuple(-tr(s2,3,s1),tr(s1,1,s2)));     
                                }
                        }

/* Second assumption: moving against a wall does nothing

 */

	for(int a=0;a<actions;a++)
		for(int s=0;s<states;s++)
		{
			vector<int> not_wall;
			for(int o=0;o<observations;o++)
			{
				if((o & (1 << a)) == 0)
					not_wall.push_back(obs(o,s));				
			}

			not_wall.push_back(tr(s,a,s));
			clauses.push_back(not_wall);
		}


/*Third assumption: moving into free space does something*/

        for(int a=0;a<actions;a++)
                for(int s=0;s<states;s++)
                {
                        vector<int> is_wall;
                        for(int o=0;o<observations;o++)
                        {
                                if((o & (1 << a)) != 0)
                                        is_wall.push_back(obs(o,s));
                        }
                       
                        is_wall.push_back(-tr(s,a,s));
                        clauses.push_back(is_wall);
                }
               

	//const int include_threshold = max(time_step/4,20);
	//const int include_threshold = 10;

	count_old_states = 0;
	count_old_edges= 0;

	if(include_old)
	{
		for(unsigned int s=0;s<transition_state_action.size();s++)
		{
			count_old_states++;
			for(int a=0;a<actions;a++)
				if(transition_state_action_verified[s][a])
				{
					clauses.push_back(tuple(tr(s,a,transition_state_action[s][a])));
					count_old_edges++;
				}
		}
	}
	else {
		//include subgraph of model containing states visited more than include_threshold timesteps ago
		
		if(include_threshold > 0)
		{
			for(unsigned int s=0;s<transition_state_action.size();s++)
			{
				if(state_visited[s]==-1 || state_visited[s] > time_step-include_threshold)
					continue;

				count_old_states++;
				for(int a=0;a<actions;a++)
					if(transition_state_action_verified[s][a] && transition_state_action_verified[s][a]<=(time_step-include_threshold) && state_visited[transition_state_action[s][a]]!=-1  && state_visited[transition_state_action[s][a]] <= time_step-include_threshold)
					{
						clauses.push_back(tuple(tr(s,a,transition_state_action[s][a])));
						count_old_edges++;
					}
			}	
		}
		cerr << "Used " << count_old_states << " old states and " << count_old_edges << " old edges above threshold of " << include_threshold << "." << endl;
	}
	

	cnfout << "p cnf " << variables_count << " " << clauses.size() << endl;
	
        for(unsigned int i=0;i<clauses.size();i++)
        {
            for(unsigned int j=0;j<clauses[i].size();j++)
            {
                if(j)
                    cnfout << " ";
                cnfout << clauses[i][j];
            }
            cnfout << " 0" << endl;
        }


}

const int xdirection[]={1,0,-1,0,0,0};/**<How x-position of the agent changes when an action is executed.*/
const int ydirection[]={0,1,0,-1,0,0};/**<How y-position of the agent changes when an action is executed.*/
const int zdirection[]={0,0,0,0 ,1,-1};/**<How y-position of the agent changes when an action is executed.*/


//vector<int> state_x;/**<Computed x position of the state, based on the model. Used for visualisation.*/
//vector<int> state_y;/**<Computed x position of the state, based on the model. Used for visualisation.*/

//const int scale = 4; /**For visualisation of the model graph, how far should be the nodes which are one step one from each other.*/
/**Computes position of the state, and goes recursively to other states. Position of the other state is computed according to the action which lead to it --  uses direction of the action for computing the position*/
/*void compute_state_position(int state, int x, int y)
{
	if(state_x[state]!=-1)
		return;
	
	state_x[state]=x;
	state_y[state]=y;

	if(state_visited[state]==-1)
		return;//do not continue if this state was not visited yet 
	
	for(int a=0;a<actions;a++)
		compute_state_position(transition_state_action[state][a],x+xdirection[a]*scale,y-ydirection[a]*scale);
}*/

/*Computes positions of states for nice visualisation of model graph. Begins at 0th state and spreads recursively. */
/*void compute_positions_of_states()
{
	state_x.clear();
	state_x.resize(states,-1);
	state_y.clear();
	state_y.resize(states,-1);
	
	compute_state_position(0,0,0);

}*/


/** Reads and interprets a solution from the output of the SAT-solver.
*/
int read_sat_solution(int time_step)
{
vector<int> data_observation;
vector<int> data_action;

  
	ifstream historyin("history.txt");
	if(!historyin)
	{
	    cerr << "Could not open file "  << endl << endl;
	    help();
	}

	ifstream solutionin("history.out");
	if(!solutionin)
	{
	    cerr << "Could not open file " << endl << endl;
	    help();
	}
	
	stringstream filename;
	filename << "history_" <<  setfill('0') << setw(3) << time_step <<  ".dot";

	ofstream graphout(filename.str().c_str());
	if(!graphout)
	{
	    cerr << "Could not open file " << endl << endl;
	    help();
	}	

//#include"params.h"

	data_observation.resize(1);
	historyin >> data_observation[0];
	
	for(time_step=0;time_step<10000;time_step++)
	{
	    int a,p;
	    historyin >> a >> p;
	    if(!historyin)
		break;
	    data_action.push_back(a);
	    data_observation.push_back(p);
	}

	cerr << "time=" << time_step << endl;	
	

//	solutionin >> actions >> states >> observations >> time_step;
//
	vector<vector<int> > clauses;


	offset_obs = 1;
	offset_tr = offset_obs + observations*states;
	offset_pos = offset_tr + states*actions*states;


	string sat;
	solutionin >> sat;
	
	if(sat!="SAT")
	{             
	    cerr << "Input file does not begin with 'SAT '. Unsatisfiable CNF formula specified to SAT solver?";
	    exit(-1);
	}

	vector<bool> val(1);

	for(int i=0;;i++)
	{
		int num;
		solutionin >> num;
		if(num==0)
		    break;
		val.push_back( (num>0) );
	}
	

	graphout << "digraph G" << endl;
	graphout <<  "{" << endl;

	vector<int> observation_in_state(states);


	for(int s=0;s<states;s++)
	{
		bool found=false;
		for(int o=0;o<observations;o++)
			if(val[obs(o,s)])
			{
//				graphout << s << "[label=\"" << o <<"\"];"<< endl;
				observation_in_state[s]=o;
				if(found)
				{
				    cerr << "ERROR: two observation for the state " << s << endl;
				    exit(-1);
				    //return -1;
				}
				found=true;
			}
		if(!found)
		{
		    cerr << "ERROR:no observation for a state " << s << endl;
		    cerr << "the respective variables are : ";
		    for(int o=0;o<observations;o++)
			cerr << obs(o,s) << ",";
		    cerr << endl;

		}
		
		    
	}

//	int transition_state_action[states][actions];
	//bool transition_state_action_verified[states][actions];


	transition_state_action.resize(states,vector<int>(actions));
	transition_state_action_verified.resize(states,vector<bool>(actions));
	transition_state_action_visited.resize(states,vector<int>(actions));
	state_visited.assign(states,-1);//reset stats
	state_statistics.assign(states,0);//reset stats

	for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		{
			for(int s2=0;s2<states;s2++)
			{
				if(val[tr(s,a,s2)])
				{
					transition_state_action[s][a]=s2;
//					graphout << s << " -> " << s2 << "[label=\"" << a << "\"];" << endl;
				}
				
			}
			transition_state_action_verified[s][a]=false;
			transition_state_action_visited[s][a]=-1;
		}
//	graphout << "}" << endl;
	
	int curr_state = 0;
	
	
	for(int t=0;t<time_step;t++)
	{
	      assert(observation_in_state[curr_state] == data_observation[t]);
	      state_statistics[curr_state]++;
	      state_visited[curr_state]=t;
      
	      transition_state_action_verified[curr_state][data_action[t]] = true;//been there, done that
	      transition_state_action_visited[curr_state][data_action[t]] = t;
	      curr_state = transition_state_action[curr_state][data_action[t]];
	      
               /*Here we also use the assumption that pairs of actions are opposite and reversible*/
//                transition_state_action_verified[curr_state][(data_action[t]+2)%actions] = true;


//			      cerr << "DEBUG:" << "a=" << data_action[t] << "tr=" << transition_state_action[s][a] << " t="<<t<< endl;


	}
	assert(observation_in_state[curr_state] == data_observation[time_step]);
	int final_state = curr_state;

	//compute_positions_of_states();
	
/*
	for(int s=0;s<states;s++)
	{
		graphout << s << "[label=\"" << observation_in_state[s] <<"\"];"<< endl;	
	}
*/

	int unknown_states = 0;
	for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		      if(!transition_state_action_verified[s][a])
		      {
			    //graphout << "Unknown_" << s << "_" << a <<  "[label=\"?\", shape=none];"<< endl;
			    unknown_states++;
			    transition_state_action[s][a]= -unknown_states;
		      }
	
	if(unknown_states == 0)
		all_world_explored = true;

/*
	for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		{
			if(transition_state_action_verified[s][a])
			{
			    graphout << s << " -> " << transition_state_action[s][a] << "[label=\"" << a << "\"];" << endl;
			}   
		        else
			    graphout << s << " -> " << "Unknown_" << s << "_" << a << "[label=\"" << a << "\"];" << endl;		
		}
	
	graphout << "}" << endl;
*/
	

	double epsilon = 1e-3; /**<Stopping criterion for difference.*/
	double delta; /**<Actual difference.*/

	vector<double> V(states);

	for(int s=0;s<states;s++)
		V[s]=0;
	
	vector<int> bestaction(states);
	
	double gamma=0.9;
	do {
	    delta = 0;
		for(int s=0;s<states;s++)
		{
		    double vOldValue = V[s];
		    
		    int maxaction = -1;
		    double maxvalue = -999;
		    
		    for(int a=0;a<actions;a++)
		    {
			int s2 = transition_state_action[s][a];
			double Value;
			if(s2<0)
			    Value=100;
			else
			    Value=V[s2];
			
			if(Value>maxvalue)
			{
			    maxvalue = Value;
			    maxaction = a;
			}
		    }
		    
		    int nextstate=transition_state_action[s][maxaction];
		    bestaction[s]=maxaction;
		    
		    V[s] = ((nextstate<0)?1:0) + gamma * maxvalue;
		    
		    delta=max(delta,fabs(V[s] - vOldValue));
		}
	cerr << "delta=" << delta << endl;
} while (delta > epsilon);	


	for(int s=0;s<states;s++)
	{
		graphout << s << "[label=\"" << observation_in_state[s] << " (V="<< V[s] << ", vis=" << state_statistics[s] << ")\"" ;
		if(s==final_state)
			graphout << "fillcolor=lightgray, style=filled";
		
		//graphout << ", pos=\"" << state_x[s] << "," << state_y[s] << "!\"";
		graphout << "];"<< endl;	
	}

      for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		      if(!transition_state_action_verified[s][a])
			    graphout << "Unknown_" << s << "_" << a <<  "[label=\"?\", shape=none];"<< endl;

	for(int s=0;s<states;s++)
		for(int a=0;a<actions;a++)
		{
			if(transition_state_action_verified[s][a])
			{
			    graphout << s << " -> " << transition_state_action[s][a] << "[label=\"" << a << "\"";
			    
		    
			}   
		        else
			    graphout << s << " -> " << "Unknown_" << s << "_" << a << "[label=\"" << a << "\"";
			
			if(a==bestaction[s])
				graphout << ",style=bold";
			else	graphout << ",style=dashed";
			
			graphout << "];" << endl;

		}
	
	graphout << "}" << endl;
	
	//cout << "final_state=" << final_state << " best action now=" << bestaction[final_state] << endl;
	return bestaction[final_state];

	
}

/** Checks the result of SAT solver. */
string check_solution()
{
	ifstream solutioncheck("history.out");
	string sat;
	solutioncheck >> sat;
	if(sat=="")
		sat="INDET";
	return sat;
}

int clk_tck = sysconf(_SC_CLK_TCK);/**<Number of system timer ticks per second, obtained from the system. Used for computing the CPU time used.*/
/** Reports CPU time used by this process and this children (should give the same result even when the process has different priority and also during different system load). Cross-checked using shell "time" command.*/
double real_time() {
	tms real;
	times(&real);
	return double(real.tms_utime+real.tms_cutime)/clk_tck;
}

/**Returns the difference -- CPU time elapsed -- between two consecutive calls of the function*/
double real_time_diff()
{
	static double oldtime = 0;
	double newtime = real_time();
	double diff = newtime-oldtime;
	oldtime=newtime;
	return diff;
}

int visited_cells = 0;/**<Number of really visited cells. Recorded by the envorinment for debugging purpose.*/


/** Writes a maze with the marking for agent's position and visited cells.
*/
void writemaze(int time, int xpos, int ypos, int zpos)
{
	stringstream filename;
	filename << "history_" << setfill('0') << setw(3) <<  time  << ".maze";

	ofstream mazefile(filename.str().c_str());
	ofstream commonfile("current.maze");

	for(int z=0;z<zsize;z++)
	{
	for(int y=0;y<ysize;y++)
	{
		for(int x=0;x<xsize;x++)
		{
			if(x==xpos && y==ypos && z==zpos)
			{
				mazefile << "A";
				commonfile << "A";
			}
			else
			{
				char mark = ' ';
				if(maze[x][y][z]==1)
					mark='x';
				else
				{
					if(maze_visited[x][y][z]!=-1)
						mark = '.';//visited cell
				}
				
				mazefile << mark;
				commonfile << mark;
			}
		}
		mazefile << endl;
		commonfile << endl;
	}
		mazefile << endl << endl;
		commonfile << endl << endl;
	}
}

double maxtimeSAT = 0;/**<maximum time for successful run of the SAT solver. Used for deriving the time limit for the SAT solver.*/
double timelimit = 1;/**<adaptive timelimit for running the SAT solver.*/


/** Writes the timelimit for running the SAT solver.
*/
void write_timelimit(double newtimelimit)
{
	timelimit=newtimelimit;
	//min(100.0,newtimelimit);
	ofstream timelimitfile("timelimit.txt");
	timelimitfile << ceil(timelimit);
}

/** Updates the timelimit after a successfull run of SAT solver. 
*/
void update_timelimit(double lasttimeSAT)
{
	if(maxtimeSAT < lasttimeSAT)
	{
		maxtimeSAT=lasttimeSAT;

		if(timelimit < maxtimeSAT*1.5)
		{
			write_timelimit(maxtimeSAT*1.5);
		}
	}
}

/** Indicates whether number of states used in a model is greater,  smaller, equal or similar than the number of really visited states. Usually the number of states in the model is slightly above -- the agent can in some situations derive, that there must be states which, were not visited yet. However, their existence can be assumed from the observations in their neigboring states.
*/
string marking(int states, int visited)
{
	if(check_solution() ==  "SAT")
	{
		if(states <  visited) return "<";
		if(states ==  1+visited) return "+1";
		if(states >  1+visited) return "> (fail)";
		return "=";
	}
	return "";
}


/** Runs the environment and the agent.

    The agent is run in an environment read from file specified as the command line argument. Some debugging output is generated.

*/
int main(int argc, char **argv)
{
#include"params3d.h"


	if(argc!=2)
	{
	    help();
	}
	
	ifstream mazein(argv[1]);
	if(!mazein)
	{
	    cerr << "Could not open file " << argv[1] << endl << endl;
	    help();
	}

	write_timelimit(2);//initial timelimit


	/**@var time_step Virtual time of the environment counted in time steps.
	*/	
	int time_step;
	mazein >> xsize >> ysize >> zsize;
	
	maze.resize(xsize, vector<vector<int> > (ysize,vector<int>(zsize)));
	maze_visited.resize(xsize, vector<vector<int> > (ysize,vector<int>(zsize,-1)));
	
	for(int z=0;z<ysize;z++)
		for(int y=0;y<ysize;y++)
	    	for(int x=0;x<xsize;x++)
	    	{
		      mazein >> maze[x][y][z];
		}
	  

	int xpos;///< x-position of the agent in the environment.
	int ypos;///< y-position of the agent in the environment.
	int zpos;///< z-position of the agent in the environment.
	
	mazein >> xpos >> ypos >> zpos;
	
	cerr << "Agent will be run in a maze " << xsize << "x" << ysize << " from the position [" << xpos << "," << ypos << "]" << endl;

	ofstream stats("statistics.log");
	stats << "time\tstates\tuse_old\t#old st.\t#old edg.\tresult\treal time\tdelta\tvisited\ttimelimit" << endl;

	data_observation.push_back(get_observation(xpos,ypos,zpos));
	cerr << "obs=" << get_observation(xpos,ypos,zpos) << endl;
	
	
	int number_of_states = 1;
	for(time_step=0;time_step<10000 && all_world_explored != true;time_step++)
	{
	      if(maze_visited[xpos][ypos][zpos]==-1)
	      		visited_cells++;//new cell

	      maze_visited[xpos][ypos][zpos]=time_step;
	      writehistory(time_step);
	      
	      cerr << "running with environment time=" << time_step << endl;

	      writemaze(time_step,xpos,ypos,zpos);
	        
	      
		    cerr << "number_of_states=" << number_of_states << endl; 
		    states=number_of_states;
		    
		    for(states=number_of_states;;states++)
		    {
		    	double lasttime=0;
			cerr << "trying to find a solution with " << states << " states." << endl;
			stats << time_step << "\t" << states << "\t";
			
			generate_cnf(true);
		    	stats << true << "\t" << count_old_states << "\t" << count_old_edges << "\t";
			system("minisat_timelimit hist.cnf.test history.out");
			stats << check_solution() << "\t" << real_time() << "\t" << (lasttime=real_time_diff()) << "\t" << visited_cells << "\t" << timelimit  << "\t" << marking(states,visited_cells) <<endl;
		    	if(check_solution() == "SAT")
		    	{
				update_timelimit(lasttime);

				if(timelimit>10*lasttime)
					write_timelimit(10*lasttime);

		    		cout << "successfull use of old model with "<< states-number_of_states<<" states added." << endl;
		    		break;
			}
		   
			stats << time_step << "\t" << states << "\t";
		    	generate_cnf(false);
			stats << false << "\t" << count_old_states << "\t" << count_old_edges << "\t";

			system("minisat_timelimit hist.cnf.test history.out");
		
			stats << check_solution() << "\t" << real_time() << "\t" << (lasttime=real_time_diff()) << "\t" << visited_cells << "\t" << timelimit  << "\t" << marking(states,visited_cells) <<endl;
	

			if(check_solution() ==  "SAT")
			{
				if(timelimit>1.5*lasttime)
					write_timelimit(1.5*lasttime);

		    		cout << "generated a new model from scratch." << endl;
//				include_threshold=10;
				break;
			}
			else
			{
				include_threshold*=0;
				if(check_solution()=="INDET" || check_solution()=="")
					write_timelimit(timelimit*3);
			}
		    }

		   
		number_of_states=states;
	      
	      stringstream command;
     
		/**The action the agent wishes to execute -- result of reinforcement learning computation on the model of the world.*/
	      int action = read_sat_solution(time_step);

	      ifstream actionfile("action.txt");
	      actionfile >> action;
	      
	      assert(action>=0 && action < actions);	      

	      /*
		if(random()%10)
	    	    action = random()%actions;
		*/

	      data_action.push_back(action);
	      
	      int newxpos = xpos + xdirection[action];
	      int newypos = ypos + ydirection[action];
	      int newzpos = zpos + zdirection[action];
	      
	      if(maze[newxpos][newypos][newzpos])
	      {
		  newxpos = xpos;
		  newypos = ypos;
		  newzpos = zpos;
	      }
	      
	      xpos  = newxpos;
	      ypos  = newypos;
	      zpos  = newzpos;
	  
	  
	      data_observation.push_back(get_observation(xpos,ypos,zpos));
	      cerr << "obs=" << get_observation(xpos,ypos,zpos) << endl;
	      cerr << "environment time=" << time_step << endl;
	}

	return 0;
}