Fix: The output file now correctly displays the evolution of the RMSE

jespb · Nov 8, 2019 · 339904b · 339904b
1 parent 89acbb9
commit 339904b
Show file tree

Hide file tree

Showing 7 changed files with 208 additions and 149 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+# Don't track compiled python files
+*pyc
+
+# Don't track the results folder
+results/
diff --git a/Main_STGP.py b/Main_STGP.py
@@ -3,95 +3,75 @@
 from stgp.STGP import STGP
 from sys import argv
 from stgp.Constants import *
+import os
 
 import time
 
+# 
+# By using this file, you are agreeing to this product's EULA
 #
-# $ python Main_STGP.py [-dsdir dir] [-d datasets] [-r]
+# This product can be obtained in https://github.com/jespb/Python-STGP
 #
-# [-dsdir dir] 
-# 	- States the dataset directory. 
-# 	- By default "datasets/" is used 
-# 	- Use "-dsdir ./" for the root directory
-# [-d datasets] 
-# 	- this flag expects a set of csv dataset names separated by ";" (e.g., a.csv;b.csv)
-# [-r] 
-# 	- States the this is a regression problem. 
-# 	- By default the STGP tries to classify samples as 0 or 1
+# Copyright ©2019 J. E. Batista
 #
 
 timestamp = time.strftime("%Y%m%d_%H%M")
 
-ds_dir = "datasets/"
-
-datasets = ["heart.csv"] #example dataset
-output = "Classification"
-
-if "-dsdir" in argv:
-	ds_dir = argv[argv.index("-dsdir")+1]
-if "-d" in argv:
-	datasets = argv[argv.index("-d")+1].split(";")
-if "-r" in argv:
-	output = "Regression"
-if "-runs" in argv:
-	RUNS = int(argv[argv.index("-runs")+1])
-
 def callstgp():
-	#for dataset in datasets:
-	#	openFile("out_"+dataset)
-	#	writeToFile(dataset+"\n")
-	#	for i in range(RUNS):
-	#		print(i,"# run with the ", dataset,"dataset")
-	#		p = pandas.read_csv(ds_dir+dataset)
-	#		stgp = STGP(p,output)
-	#		writeToFile(str(i)+",")
-	#		writeToFile(str(stgp.getCurrentGeneration())+",")
-	#		writeToFile(str(stgp.getTrainingAccuracy())+",")
-	#		writeToFile(str(stgp.getTestAccuracy())+",")
-	#		writeToFile(str(stgp.getTrainingRMSE())+",")
-	#		writeToFile(str(stgp.getTestRMSE())+",")
-	#	closeFile()
+	try:
+		os.makedirs(OUTPUT_DIR)
+	except:
+		pass
 
-	for dataset in datasets:#["trio_brasil.csv","trio_congo.csv","trio_mocambique.csv","trio_combo.csv"]:#["mcd3.csv","mcd10.csv","brasil.csv","movl.csv","heart.csv","vowel.csv","wav.csv","yeast.csv","seg.csv"]:
-		openFile("stgp_"+timestamp + "_"+dataset)
+	for dataset in DATASETS:
+		openFile(OUTPUT_DIR+"tmp_stgp_"+timestamp + "_"+dataset)
 		writeToFile(dataset+"\n")
 		toWrite=[]
 		for i in range(RUNS):
 			print(i,"# run with the", dataset,"dataset")
-			p = pandas.read_csv(ds_dir+dataset)
-			stgp = STGP(p,output)
+			p = pandas.read_csv(DATASETS_DIR+dataset)
+			stgp = STGP(p)
 
 			writeToFile(",")
 			for i in range(MAX_GENERATION):
 				writeToFile(str(i)+",")
 
-			fitness = stgp.getFitnessOverTime()
+			accuracy = stgp.getAccuracyOverTime()
+			rmse = stgp.getRmseOverTime()
 			size = stgp.getSizeOverTime()
-			toWrite.append([fitness[0],fitness[1],size,str(stgp.getBestIndividual())])
+			toWrite.append([accuracy[0],accuracy[1],rmse[0],rmse[1],size,str(stgp.getBestIndividual())])
+
+			writeToFile("\nTraining-Accuracy,")
+			for val in accuracy[0]:
+				writeToFile(str(val)+",")
+
+			writeToFile("\nTest-Accuracy,")
+			for val in accuracy[1]:
+				writeToFile(str(val)+",")
 
-			writeToFile("\nTraining,")
-			for val in fitness[0]:
+			writeToFile("\nTraining-RMSE,")
+			for val in rmse[0]:
 				writeToFile(str(val)+",")
 
-			writeToFile("\nTest,")
-			for val in fitness[1]:
+			writeToFile("\nTest-RMSE,")
+			for val in rmse[1]:
 				writeToFile(str(val)+",")
 
 			writeToFile("\nSize,")
-			for val in fitness[0]:
+			for val in size:
 				writeToFile(str(val)+",")
 
 			writeToFile("\n"+str(stgp.getBestIndividual())+"\n")
 
 		closeFile()
 
-		openFile("stgp_"+timestamp + "_"+dataset) 
+		openFile(OUTPUT_DIR+"stgp_"+timestamp + "_"+dataset) 
 		writeToFile("Attribute,Run,")
 		for i in range(MAX_GENERATION):
 			writeToFile(str(i)+",")
 		writeToFile("\n")
 
-		attributes= ["Training","Test","Size","Dimensions","Final_Model"]
+		attributes= ["Training-Accuracy","Test-Accuracy","Training-RMSE","Test-RMSE","Size","Dimensions","Final_Model"]
 		for ai in range(len(toWrite[0])-1):
 			for i in range(len(toWrite)):
 				writeToFile("\n"+attributes[ai]+","+str(i)+",")
@@ -106,5 +86,6 @@ def callstgp():
 
 
 		closeFile()
+		os.remove(OUTPUT_DIR+"tmp_stgp_"+timestamp + "_"+dataset)
 
 callstgp()
diff --git a/README.txt b/README.txt
@@ -0,0 +1,62 @@
+By using this file, you are agreeing to this product's EULA
+This product can be obtained in https://github.com/jespb/Python-STGP
+Copyright ©2019 J. E. Batista
+
+
+This implementation of STGP uses the following command and flags:
+
+$ python Main_STGP.py
+
+	[-d datasets] 
+		- This flag expects a set of csv dataset names separated by ";" (e.g., a.csv;b.csv)
+		- By default, the heart.csv dataset is used		
+
+	[-dontshuffle]
+		- By using this flag, the dataset will not be shuffled;
+		- By default, the dataset is shuffled.
+
+	[-dsdir dataset_dir] 
+		- States the dataset directory. 
+		- By default "datasets/" is used 
+		- Use "-dsdir ./" for the root directory	
+
+	[-es elite_size]
+		- This flag expects an integer with the elite size;
+		- By default, the elite has size 1.
+
+	[-md max_depth]
+		- This flag expects an integer with the maximum initial depth for the trees;
+		- By default, this value is set to 6.		
+
+	[-mg max_generation]
+		- This flag expects an integer with the maximum number of generations;
+		- By default, this value is set to 100.
+
+	[-odir output_dir] 
+		- States the output directory. 
+		- By default "results/" is used 
+		- Use "-odir ./" for the root directory
+
+	[-op operators]
+		- This flag excepts a set of operators separated by ";"
+		- Allowed operators: +;-;*;/
+		- By default, the used operators are the sum, subtraction, multiplication and protected division.		
+	[-ps population_size]
+		- This flag expects an integer with the size of the population;
+		- By default, this value is set to 200.
+
+	[-r] 
+		- States the this is a regression problem. 
+		- By default the GSGP tries to classify samples as 0 or 1
+
+	[-runs number_of_runs] 
+		- This flag expects an integer with the number of runs to be made;
+		- By default, this values is set to 30
+
+	[-tf train_fraction]
+		- This flag expects a float [0;1] with the fraction of the dataset to be used in training;
+		- By default, this value is set to 0.70
+
+	[-ts tournament_size]
+		- This flag expects an integer with the tournament size;
+		- By default, this value is set to 10.
diff --git a/stgp/Constants.py b/stgp/Constants.py
@@ -1,3 +1,5 @@
+from sys import argv
+
 # 
 # By using this file, you are agreeing to this product's EULA
 #
@@ -9,16 +11,51 @@
 OPERATORS = ["+","-","*","/"]
 MAX_DEPTH = 6 # max depth of the initial trees and the trees used for mutation
 POPULATION_SIZE = 200
-MAX_GENERATION = 10
+MAX_GENERATION = 100
 TRAIN_FRACTION = 0.70
 TOURNAMENT_SIZE = 10
 ELITISM_SIZE = 1
 SHUFFLE = True
 LIMIT_DEPTH=15
-RUNS = 10
-
+RUNS = 30
+DATASETS_DIR = "datasets/"
+OUTPUT_DIR = "results/"
+DATASETS = ["heart.csv"]
+OUTPUT = "Classification"
 out = None
 
+if "-dsdir" in argv:
+	DATASETS_DIR = argv[argv.index("-dsdir")+1]
+if "-odir" in argv:
+	OUTPUT_DIR = argv[argv.index("-odir")+1]
+if "-d" in argv:
+	DATASETS = argv[argv.index("-d")+1].split(";")
+if "-r" in argv:
+	OUTPUT = "Regression"
+if "-runs" in argv:
+	RUNS = int(argv[argv.index("-runs")+1])
+if "-op" in argv:
+	OPERATORS = argv[argv.index("-op")+1].split(";")
+if "-md" in argv:
+	MAX_DEPTH = int(argv[argv.index("-md")+1])
+if "-ps" in argv:
+	POPULATION_SIZE = int(argv[argv.index("-ps")+1])
+if "-mg" in argv:
+	MAX_GENERATION = int(argv[argv.index("-mg")+1])
+if "-tf" in argv:
+	TRAIN_FRACTION = float(argv[argv.index("-train")+1])
+if "-ts" in argv:
+	TOURNAMENT_SIZE = int(argv[argv.index("-ts")+1])
+if "-es" in argv:
+	ELITISM_SIZE = int(argv[argv.index("-es")+1])
+if "-dontshuffle" in argv:
+	SHUFFLE = False
+if "-ms" in argv:
+	MUTATION_STEP = float(argv[argv.index("-ms")+1])
+
+
+
+
 def openFile(name):
 	global out
 	out = open(name,"w")