Skip to content

Commit

Permalink
Update: includes adding random_states and changing the name from STGP…
Browse files Browse the repository at this point in the history
… to StdGP
  • Loading branch information
João Batista committed Oct 3, 2022
1 parent 26f15b9 commit ba5713e
Show file tree
Hide file tree
Showing 9 changed files with 1,302 additions and 16 deletions.
22 changes: 19 additions & 3 deletions Arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
#
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://github.com/jespb/Python-STGP
# This product can be obtained in https://github.com/jespb/Python-StdGP
#
# Copyright ©2019-2021 J. E. Batista
# Copyright ©2019-2022 J. E. Batista
#


# Operators to be used by the models
# Only these operators are available. To add mode, edit m3gp.Node.calculate(self, sample)
OPERATORS = ["+","-","*","/"]

#OPERATORS = [("+",2),("-",2),("*",2),("/",2),("log2",1), ("max", 3)] # Example
OPERATORS = [("+",2),("-",2),("*",2),("/",2)] # Default

# Initial Maximum depth
MAX_DEPTH = 6
Expand Down Expand Up @@ -46,6 +48,14 @@
# Number of CPU Threads to be used
THREADS = 1

# Random state
RANDOM_STATE = 42

# Models wrapped by the StdGP models
MODEL_NAME = ["SimpleThresholdClassifier"][0]

# Fitness used by the M3GP models
FITNESS_TYPE = ["Accuracy", "MSE", "WAF", "2FOLD"][0]



Expand All @@ -72,6 +82,9 @@

if "-op" in argv:
OPERATORS = argv[argv.index("-op")+1].split(";")
for i in range(len(OPERATORS)):
OPERATORS[i] = OPERATORS[i].split(",")
OPERATORS[i][1] = int(OPERATORS[i][1])

if "-md" in argv:
MAX_DEPTH = int(argv[argv.index("-md")+1])
Expand Down Expand Up @@ -100,4 +113,7 @@
if "-t" in argv:
THREADS = int(argv[argv.index("-t")+1])

if "-rs" in argv:
RANDOM_STATE = int(argv[argv.index("-rs")+1])


40 changes: 40 additions & 0 deletions Main_StdGP_classification_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pandas

from stdgp.StdGP import StdGP

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score



#
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://github.com/jespb/Python-StdGP
#
# Copyright ©2019-2022 J. E. Batista
#



filename= "heart.csv"

# Open the dataset
ds = pandas.read_csv("datasets/"+filename)
class_header = ds.columns[-1]

# Split the dataset
Tr_X, Te_X, Tr_Y, Te_Y = train_test_split(ds.drop(columns=[class_header]), ds[class_header],
train_size=0.7, random_state = 42, stratify = ds[class_header])

# Train a model
model = StdGP()
model.fit(Tr_X, Tr_Y)

# Predict test results
pred = model.predict(Te_X)

# Obtain test accuracy
print( accuracy_score(pred, Te_Y) )

157 changes: 157 additions & 0 deletions Main_StdGP_standalone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import pandas

from stdgp.StdGP import StdGP
from sys import argv
from Arguments import *
import os

from sklearn.model_selection import train_test_split

import numpy as np




#
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://github.com/jespb/Python-StdGP
#
# Copyright ©2019-2022 J. E. Batista
#




def openAndSplitDatasets(which,seed):
if VERBOSE:
print( "> Opening: ", which )

# Open dataset
ds = pandas.read_csv(DATASETS_DIR+which)

# Read header
class_header = ds.columns[-1]

return train_test_split(ds.drop(columns=[class_header]), ds[class_header],
train_size=TRAIN_FRACTION, random_state=seed,
stratify = ds[class_header])


def run(r,dataset):
if VERBOSE:
print("> Starting run:")
print(" > ID:", r)
print(" > Dataset: "+dataset)
print()

Tr_X, Te_X, Tr_Y, Te_Y = openAndSplitDatasets(dataset,r)

# Train a model
model = StdGP(OPERATORS, MAX_DEPTH, POPULATION_SIZE, MAX_GENERATION, TOURNAMENT_SIZE,
ELITISM_SIZE, LIMIT_DEPTH, THREADS, r, VERBOSE, MODEL_NAME, FITNESS_TYPE)
model.fit(Tr_X, Tr_Y, Te_X, Te_Y)


# Obtain training results
accuracy = model.getAccuracyOverTime()
waf = model.getWaFOverTime()
kappa = model.getKappaOverTime()
mse = model.getMSEOverTime()
size = model.getSizeOverTime()
model_str = str(model.getBestIndividual())
times = model.getGenerationTimes()

tr_acc = accuracy[0]
te_acc = accuracy[1]
tr_waf = waf[0]
te_waf = waf[1]
tr_kappa = kappa[0]
te_kappa = kappa[1]
tr_mse = mse[0]
te_mse = mse[1]

if VERBOSE:
print("> Ending run:")
print(" > ID:", r)
print(" > Dataset:", dataset)
print(" > Final model:", model_str)
print(" > Training accuracy:", tr_acc[-1])
print(" > Test accuracy:", te_acc[-1])
print()

return (tr_acc,te_acc,
tr_waf,te_waf,
tr_kappa,te_kappa,
tr_mse,te_mse,
size,
times,
model_str)


def call_StdGP():
try:
os.makedirs(OUTPUT_DIR)
except:
pass

for dataset in DATASETS:
outputFilename = OUTPUT_DIR+"StdGP_"+ dataset
if not os.path.exists(outputFilename):
results = []

# Run the algorithm several times
for r in range(RUNS):
results.append(run(r,dataset))

# Write output header
file = open(outputFilename , "w")
file.write("Attribute,Run,")
for i in range(MAX_GENERATION):
file.write(str(i)+",")
file.write("\n")

attributes= ["Training-Accuracy","Test-Accuracy",
"Training-WaF", "Test-WaF",
"Training-Kappa", "Test-Kappa",
"Training-MSE", "Test-MSE",
"Size",
"Time",
"Final_Model"]

# Write attributes with value over time
for ai in range(len(attributes)-1):
for i in range(RUNS):
file.write("\n"+attributes[ai]+","+str(i)+",")
file.write( ",".join([str(val) for val in results[i][ai]]))
file.write("\n")

# Write the final models
for i in range(len(results)):
file.write("\n"+attributes[-1]+","+str(i)+",")
file.write(results[i][-1])
file.write("\n")

# Write some parameters
file.write("\n\nParameters")
file.write("\nOperators,"+str(OPERATORS))
file.write("\nMax Initial Depth,"+str(MAX_DEPTH))
file.write("\nPopulation Size,"+str(POPULATION_SIZE))
file.write("\nMax Generation,"+str(MAX_GENERATION))
file.write("\nTournament Size,"+str(TOURNAMENT_SIZE))
file.write("\nElitism Size,"+str(ELITISM_SIZE))
file.write("\nDepth Limit,"+str(LIMIT_DEPTH))
file.write("\nWrapped Model,"+MODEL_NAME)
file.write("\nFitness Type,"+FITNESS_TYPE)
file.write("\nThreads,"+str(THREADS))
file.write("\nRandom State,"+str(list(range(RUNS))))
file.write("\nDataset,"+dataset)


file.close()
else:
print("Filename: " + outputFilename +" already exists.")


if __name__ == '__main__':
call_StdGP()
26 changes: 13 additions & 13 deletions README.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
This is a, easy-to-use, scikit-learn inspired version of the STGP algorithm.
This is a, easy-to-use, scikit-learn inspired version of the Standard Genetic Programming (StdGP) algorithm.


By using this file, you are agreeing to this product's EULA
This product can be obtained in https://github.com/jespb/Python-STGP
Copyright ©2019-2021 J. E. Batista
This product can be obtained in https://github.com/jespb/Python-StdGP
Copyright ©2019-2022 J. E. Batista


This file contains information about the command and flags used in the stand-alone version of this implementation and an explanation on how to import, use and edit this implementation.


This implementation of STGP can be used in a stand-alone fashion using the following command and flags:
This implementation of StdGP can be used in a stand-alone fashion using the following command and flags:

$ python Main_STGP_standalone.py
$ python Main_StdGP_standalone.py

[-d datasets]
- This flag expects a set of csv dataset names separated by ";" (e.g., a.csv;b.csv)
Expand Down Expand Up @@ -77,11 +77,11 @@ How to import this implementation to your project:
- import the STGP class using "from stgp.STGP import STGP".

How to use this implementation:
$ from stgp.STGP import STGP
$ model = STGP()
$ from stdgp.StdGP import StdGP
$ model = StdGP()
$ model.fit( training_x, training_y)

Arguments for STGP():
Arguments for StdGP():
operators -> Operators used by the individual (default: ["+","-","*","/"] )
max_depth -> Max initial depths of the individuals (default: 6)
population_size -> Population size (default: 500)
Expand All @@ -99,25 +99,25 @@ Arguments for model.fit():


Useful methods:
$ model = STGP() -> starts the model;
$ model = StdGP() -> starts the model;
$ model.fit(X, Y) -> fits the model to the dataset;
$ model.predict(dataset) -> Returns a list with the prediction of the given dataset.




How to edit this implementation:
Fitness Function ( stgp.Individual ):
Fitness Function ( stdgp.Individual ):
- Change the getFitness() method to use your own fitness function;
- This implementation assumes that a higher fitness is always better. To change this, edit the __gt__ method in this class;
- You may use the getTrainingPredictions() and getTrainingSet() to obtain the models prediction and the training set;
- You can also explore the behind the standard fitness function;
- Warning: STGP evaluates every model in every run, as such, I do not recomend complex fitness functions. You should invest in fast evaluation methods to train a population.
- Warning: StdGP evaluates every model in every run, as such, I do not recomend complex fitness functions. You should invest in fast evaluation methods to train a population.

Classification method ( stgp.Individual ):
Classification method ( stdgp.Individual ):
- Change the trainModel() method to use your own classifier;
- Assuming it is a scykit-learn implementation, you may only need to change the first few lines of this method;
- Warning: STGP evaluates every model in every run, as such, I do not recomend complex classification model. You should invest in fast classification methods to train a population and the use a more complex method (if you wish) on the final model.
- Warning: StdGP evaluates every model in every run, as such, I do not recomend complex classification model. You should invest in fast classification methods to train a population and the use a more complex method (if you wish) on the final model.


Reference:
Expand Down
Loading

0 comments on commit ba5713e

Please sign in to comment.