Skip to content

Commit

Permalink
Revamped to match the new Python-M3GP implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
João Batista committed May 31, 2021
1 parent c883fd0 commit 2c2202a
Show file tree
Hide file tree
Showing 10 changed files with 1,030 additions and 370 deletions.
96 changes: 50 additions & 46 deletions stgp/Constants.py → Arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,95 +5,99 @@
#
# This product can be obtained in https://github.com/jespb/Python-STGP
#
# Copyright ©2019 J. E. Batista
# Copyright ©2019-2021 J. E. Batista
#


# Operators to be used by the models
# Only these operators are available. To add mode, edit m3gp.Node.calculate(self, sample)
OPERATORS = ["+","-","*","/"]
MAX_DEPTH = 6 # max depth of the initial trees and the trees used for mutation
POPULATION_SIZE = 200

# Initial Maximum depth
MAX_DEPTH = 6

# Number of models in the population
POPULATION_SIZE = 500

# Maximum number of iterations
MAX_GENERATION = 100

# Fraction of the dataset to be used as training (used by Main_M3GP_standalone.py)
TRAIN_FRACTION = 0.70
TOURNAMENT_SIZE = 10

# Number of individuals to be used in the tournament
TOURNAMENT_SIZE = 5

# Number of best individuals to be automatically moved to the next generation
ELITISM_SIZE = 1

# Shuffle the dataset (used by Main_M3GP_standalone.py)
SHUFFLE = True
LIMIT_DEPTH=15

# Dimensions maximum depth
LIMIT_DEPTH=17

# Number of runs (used by Main_M3GP_standalone.py)
RUNS = 30

# Verbose
VERBOSE = True

# Number of CPU Threads to be used
THREADS = 1




DATASETS_DIR = "datasets/"
OUTPUT_DIR = "results/"

DATASETS = ["heart.csv"]
OUTPUT = "Classification"
out = None
THREADS = 1




if "-dsdir" in argv:
DATASETS_DIR = argv[argv.index("-dsdir")+1]

if "-odir" in argv:
OUTPUT_DIR = argv[argv.index("-odir")+1]

if "-d" in argv:
DATASETS = argv[argv.index("-d")+1].split(";")
if "-r" in argv:
OUTPUT = "Regression"

if "-runs" in argv:
RUNS = int(argv[argv.index("-runs")+1])

if "-op" in argv:
OPERATORS = argv[argv.index("-op")+1].split(";")

if "-md" in argv:
MAX_DEPTH = int(argv[argv.index("-md")+1])

if "-ps" in argv:
POPULATION_SIZE = int(argv[argv.index("-ps")+1])

if "-mg" in argv:
MAX_GENERATION = int(argv[argv.index("-mg")+1])

if "-tf" in argv:
TRAIN_FRACTION = float(argv[argv.index("-train")+1])
TRAIN_FRACTION = float(argv[argv.index("-tf")+1])

if "-ts" in argv:
TOURNAMENT_SIZE = int(argv[argv.index("-ts")+1])

if "-es" in argv:
ELITISM_SIZE = int(argv[argv.index("-es")+1])

if "-dontshuffle" in argv:
SHUFFLE = False

if "-s" in argv:
VERBOSE = False
if "-ms" in argv:
MUTATION_STEP = float(argv[argv.index("-ms")+1])

if "-t" in argv:
THREADS = int(argv[argv.index("-t")+1])




def openFile(name):
global out
out = open(name,"w")

def writeToFile(msg):
global out
out.write(msg)

def closeFile():
global out
out.close()

terminals = None
def setTerminals(l):
global terminals
terminals = l
def getTerminals():
return terminals

trainingSet = None
def setTrainingSet(ds):
global trainingSet
trainingSet = ds
def getTrainingSet():
return trainingSet

testSet = None
def setTestSet(ds):
global testSet
testSet = ds
def getTestSet():
return testSet
44 changes: 44 additions & 0 deletions Main_STGP_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas

from stgp.STGP import STGP

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

import warnings

warnings.filterwarnings("ignore", category=FutureWarning,
message="From version 0.21, test_size will always complement",
module="sklearn")

#
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://github.com/jespb/Python-STGP
#
# Copyright ©2019-2021 J. E. Batista
#



filename= "heart.csv"

# Open the dataset
ds = pandas.read_csv("datasets/"+filename)
class_header = ds.columns[-1]

# Split the dataset
Tr_X, Te_X, Tr_Y, Te_Y = train_test_split(ds.drop(columns=[class_header]), ds[class_header],
train_size=0.7, random_state = 42, stratify = ds[class_header])

# Train a model
model = STGP()
model.fit(Tr_X, Tr_Y)

# Predict test results
pred = m3gp.predict(Te_X)

# Obtain test accuracy
print( accuracy_score(pred, Te_Y) )

145 changes: 145 additions & 0 deletions Main_STGP_standalone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import pandas

from stgp.STGP import STGP
from sys import argv
from Arguments import *
import os

from sklearn.model_selection import train_test_split

import numpy as np

import warnings

warnings.filterwarnings("ignore", category=FutureWarning,
message="From version 0.21, test_size will always complement",
module="sklearn")


#
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://github.com/jespb/Python-STGP
#
# Copyright ©2019-2021 J. E. Batista
#




def openAndSplitDatasets(which,seed):
if VERBOSE:
print( "> Opening: ", which )

# Open dataset
ds = pandas.read_csv(DATASETS_DIR+which)

# Read header
class_header = ds.columns[-1]

return train_test_split(ds.drop(columns=[class_header]), ds[class_header],
train_size=TRAIN_FRACTION, random_state=seed,
stratify = ds[class_header])


def run(r,dataset):
if VERBOSE:
print("> Starting run:")
print(" > ID:", r)
print(" > Dataset:", dataset)
print()

Tr_X, Te_X, Tr_Y, Te_Y = openAndSplitDatasets(dataset,r)

# Train a model
model = STGP(OPERATORS, MAX_DEPTH, POPULATION_SIZE, MAX_GENERATION, TOURNAMENT_SIZE,
ELITISM_SIZE, LIMIT_DEPTH, THREADS, VERBOSE)
model.fit(Tr_X, Tr_Y, Te_X, Te_Y)


# Obtain training results
accuracy = model.getAccuracyOverTime()
rmse = model.getRMSEOverTime()
size = model.getSizeOverTime()
model_str = str(model.getBestIndividual())
times = model.getGenerationTimes()

tr_acc = accuracy[0]
te_acc = accuracy[1]
tr_rmse = rmse[0]
te_rmse = rmse[1]

if VERBOSE:
print("> Ending run:")
print(" > ID:", r)
print(" > Dataset:", dataset)
print(" > Final model:", model_str)
print(" > Training accuracy:", tr_acc[-1])
print(" > Test accuracy:", te_acc[-1])
print()

return (tr_acc,te_acc,
tr_rmse,te_rmse,
size, times,
model_str)


def callm3gp():
try:
os.makedirs(OUTPUT_DIR)
except:
pass

for dataset in DATASETS:
outputFilename = OUTPUT_DIR+"stgp_"+ dataset
if not os.path.exists(outputFilename):
results = []

# Run the algorithm several times
for r in range(RUNS):
results.append(run(r,dataset))

# Write output header
file = open(outputFilename , "w")
file.write("Attribute,Run,")
for i in range(MAX_GENERATION):
file.write(str(i)+",")
file.write("\n")

attributes= ["Training-Accuracy","Test-Accuracy",
"Training-RMSE", "Test-RMSE",
"Size", "Time",
"Final_Model"]

# Write attributes with value over time
for ai in range(len(attributes)-1):
for i in range(RUNS):
file.write("\n"+attributes[ai]+","+str(i)+",")
file.write( ",".join([str(val) for val in results[i][ai]]))
file.write("\n")

# Write the final models
for i in range(len(results)):
file.write("\n"+attributes[-1]+","+str(i)+",")
file.write(results[i][-1])
file.write("\n")

# Write some parameters
file.write("\n\nParameters")
file.write("\nOperators,"+str(OPERATORS))
file.write("\nMax Initial Depth,"+str(MAX_DEPTH))
file.write("\nPopulation Size,"+str(POPULATION_SIZE))
file.write("\nMax Generation,"+str(MAX_GENERATION))
file.write("\nTournament Size,"+str(TOURNAMENT_SIZE))
file.write("\nElitism Size,"+str(ELITISM_SIZE))
file.write("\nDepth Limit,"+str(LIMIT_DEPTH))
file.write("\nThreads,"+str(THREADS))


file.close()
else:
print("Filename: " + outputFilename +" already exists.")


if __name__ == '__main__':
callm3gp()
Loading

0 comments on commit 2c2202a

Please sign in to comment.