Skip to content

Commit

Permalink
Fix: The output file now correctly displays the evolution of the RMSE
Browse files Browse the repository at this point in the history
  • Loading branch information
jespb committed Nov 8, 2019
1 parent 89acbb9 commit 339904b
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 149 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Don't track compiled python files
*pyc

# Don't track the results folder
results/
83 changes: 32 additions & 51 deletions Main_STGP.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,95 +3,75 @@
from stgp.STGP import STGP
from sys import argv
from stgp.Constants import *
import os

import time

#
# By using this file, you are agreeing to this product's EULA
#
# $ python Main_STGP.py [-dsdir dir] [-d datasets] [-r]
# This product can be obtained in https://github.com/jespb/Python-STGP
#
# [-dsdir dir]
# - States the dataset directory.
# - By default "datasets/" is used
# - Use "-dsdir ./" for the root directory
# [-d datasets]
# - this flag expects a set of csv dataset names separated by ";" (e.g., a.csv;b.csv)
# [-r]
# - States the this is a regression problem.
# - By default the STGP tries to classify samples as 0 or 1
# Copyright ©2019 J. E. Batista
#

timestamp = time.strftime("%Y%m%d_%H%M")

ds_dir = "datasets/"

datasets = ["heart.csv"] #example dataset
output = "Classification"

if "-dsdir" in argv:
ds_dir = argv[argv.index("-dsdir")+1]
if "-d" in argv:
datasets = argv[argv.index("-d")+1].split(";")
if "-r" in argv:
output = "Regression"
if "-runs" in argv:
RUNS = int(argv[argv.index("-runs")+1])

def callstgp():
#for dataset in datasets:
# openFile("out_"+dataset)
# writeToFile(dataset+"\n")
# for i in range(RUNS):
# print(i,"# run with the ", dataset,"dataset")
# p = pandas.read_csv(ds_dir+dataset)
# stgp = STGP(p,output)
# writeToFile(str(i)+",")
# writeToFile(str(stgp.getCurrentGeneration())+",")
# writeToFile(str(stgp.getTrainingAccuracy())+",")
# writeToFile(str(stgp.getTestAccuracy())+",")
# writeToFile(str(stgp.getTrainingRMSE())+",")
# writeToFile(str(stgp.getTestRMSE())+",")
# closeFile()
try:
os.makedirs(OUTPUT_DIR)
except:
pass

for dataset in datasets:#["trio_brasil.csv","trio_congo.csv","trio_mocambique.csv","trio_combo.csv"]:#["mcd3.csv","mcd10.csv","brasil.csv","movl.csv","heart.csv","vowel.csv","wav.csv","yeast.csv","seg.csv"]:
openFile("stgp_"+timestamp + "_"+dataset)
for dataset in DATASETS:
openFile(OUTPUT_DIR+"tmp_stgp_"+timestamp + "_"+dataset)
writeToFile(dataset+"\n")
toWrite=[]
for i in range(RUNS):
print(i,"# run with the", dataset,"dataset")
p = pandas.read_csv(ds_dir+dataset)
stgp = STGP(p,output)
p = pandas.read_csv(DATASETS_DIR+dataset)
stgp = STGP(p)

writeToFile(",")
for i in range(MAX_GENERATION):
writeToFile(str(i)+",")

fitness = stgp.getFitnessOverTime()
accuracy = stgp.getAccuracyOverTime()
rmse = stgp.getRmseOverTime()
size = stgp.getSizeOverTime()
toWrite.append([fitness[0],fitness[1],size,str(stgp.getBestIndividual())])
toWrite.append([accuracy[0],accuracy[1],rmse[0],rmse[1],size,str(stgp.getBestIndividual())])

writeToFile("\nTraining-Accuracy,")
for val in accuracy[0]:
writeToFile(str(val)+",")

writeToFile("\nTest-Accuracy,")
for val in accuracy[1]:
writeToFile(str(val)+",")

writeToFile("\nTraining,")
for val in fitness[0]:
writeToFile("\nTraining-RMSE,")
for val in rmse[0]:
writeToFile(str(val)+",")

writeToFile("\nTest,")
for val in fitness[1]:
writeToFile("\nTest-RMSE,")
for val in rmse[1]:
writeToFile(str(val)+",")

writeToFile("\nSize,")
for val in fitness[0]:
for val in size:
writeToFile(str(val)+",")

writeToFile("\n"+str(stgp.getBestIndividual())+"\n")

closeFile()

openFile("stgp_"+timestamp + "_"+dataset)
openFile(OUTPUT_DIR+"stgp_"+timestamp + "_"+dataset)
writeToFile("Attribute,Run,")
for i in range(MAX_GENERATION):
writeToFile(str(i)+",")
writeToFile("\n")

attributes= ["Training","Test","Size","Dimensions","Final_Model"]
attributes= ["Training-Accuracy","Test-Accuracy","Training-RMSE","Test-RMSE","Size","Dimensions","Final_Model"]
for ai in range(len(toWrite[0])-1):
for i in range(len(toWrite)):
writeToFile("\n"+attributes[ai]+","+str(i)+",")
Expand All @@ -106,5 +86,6 @@ def callstgp():


closeFile()
os.remove(OUTPUT_DIR+"tmp_stgp_"+timestamp + "_"+dataset)

callstgp()
62 changes: 62 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
By using this file, you are agreeing to this product's EULA
This product can be obtained in https://github.com/jespb/Python-STGP
Copyright ©2019 J. E. Batista


This implementation of STGP uses the following command and flags:

$ python Main_STGP.py

[-d datasets]
- This flag expects a set of csv dataset names separated by ";" (e.g., a.csv;b.csv)
- By default, the heart.csv dataset is used

[-dontshuffle]
- By using this flag, the dataset will not be shuffled;
- By default, the dataset is shuffled.

[-dsdir dataset_dir]
- States the dataset directory.
- By default "datasets/" is used
- Use "-dsdir ./" for the root directory

[-es elite_size]
- This flag expects an integer with the elite size;
- By default, the elite has size 1.

[-md max_depth]
- This flag expects an integer with the maximum initial depth for the trees;
- By default, this value is set to 6.

[-mg max_generation]
- This flag expects an integer with the maximum number of generations;
- By default, this value is set to 100.

[-odir output_dir]
- States the output directory.
- By default "results/" is used
- Use "-odir ./" for the root directory

[-op operators]
- This flag excepts a set of operators separated by ";"
- Allowed operators: +;-;*;/
- By default, the used operators are the sum, subtraction, multiplication and protected division.
[-ps population_size]
- This flag expects an integer with the size of the population;
- By default, this value is set to 200.

[-r]
- States the this is a regression problem.
- By default the GSGP tries to classify samples as 0 or 1

[-runs number_of_runs]
- This flag expects an integer with the number of runs to be made;
- By default, this values is set to 30

[-tf train_fraction]
- This flag expects a float [0;1] with the fraction of the dataset to be used in training;
- By default, this value is set to 0.70

[-ts tournament_size]
- This flag expects an integer with the tournament size;
- By default, this value is set to 10.
43 changes: 40 additions & 3 deletions stgp/Constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from sys import argv

#
# By using this file, you are agreeing to this product's EULA
#
Expand All @@ -9,16 +11,51 @@
OPERATORS = ["+","-","*","/"]
MAX_DEPTH = 6 # max depth of the initial trees and the trees used for mutation
POPULATION_SIZE = 200
MAX_GENERATION = 10
MAX_GENERATION = 100
TRAIN_FRACTION = 0.70
TOURNAMENT_SIZE = 10
ELITISM_SIZE = 1
SHUFFLE = True
LIMIT_DEPTH=15
RUNS = 10

RUNS = 30
DATASETS_DIR = "datasets/"
OUTPUT_DIR = "results/"
DATASETS = ["heart.csv"]
OUTPUT = "Classification"
out = None

if "-dsdir" in argv:
DATASETS_DIR = argv[argv.index("-dsdir")+1]
if "-odir" in argv:
OUTPUT_DIR = argv[argv.index("-odir")+1]
if "-d" in argv:
DATASETS = argv[argv.index("-d")+1].split(";")
if "-r" in argv:
OUTPUT = "Regression"
if "-runs" in argv:
RUNS = int(argv[argv.index("-runs")+1])
if "-op" in argv:
OPERATORS = argv[argv.index("-op")+1].split(";")
if "-md" in argv:
MAX_DEPTH = int(argv[argv.index("-md")+1])
if "-ps" in argv:
POPULATION_SIZE = int(argv[argv.index("-ps")+1])
if "-mg" in argv:
MAX_GENERATION = int(argv[argv.index("-mg")+1])
if "-tf" in argv:
TRAIN_FRACTION = float(argv[argv.index("-train")+1])
if "-ts" in argv:
TOURNAMENT_SIZE = int(argv[argv.index("-ts")+1])
if "-es" in argv:
ELITISM_SIZE = int(argv[argv.index("-es")+1])
if "-dontshuffle" in argv:
SHUFFLE = False
if "-ms" in argv:
MUTATION_STEP = float(argv[argv.index("-ms")+1])




def openFile(name):
global out
out = open(name,"w")
Expand Down
Loading

0 comments on commit 339904b

Please sign in to comment.