Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New search space def #131

Merged
merged 7 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from . import objectives
from . import selectors
from . import tpot_estimator

from . import old_config_utils

from .tpot_estimator import TPOTClassifier, TPOTRegressor, TPOTEstimator, TPOTEstimatorSteadyState

Expand Down
3 changes: 2 additions & 1 deletion tpot2/builtin_modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .arithmetictransformer import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
from .passthrough import Passthrough
from .imputer import ColumnSimpleImputer
from .selector_wrappers import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor
from .selector_wrappers import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor
from .estimatortransformer import EstimatorTransformer
121 changes: 121 additions & 0 deletions tpot2/builtin_modules/estimatortransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from numpy import ndarray
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import cross_val_predict
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.metaestimators import available_if
import numpy as np
from sklearn.utils.validation import check_is_fitted

class EstimatorTransformer(BaseEstimator, TransformerMixin):
def __init__(self, estimator, method='auto', passthrough=False, cross_val_predict_cv=0):
self.estimator = estimator
self.method = method
self.passthrough = passthrough
self.cross_val_predict_cv = cross_val_predict_cv

def fit(self, X, y=None):
return self.estimator.fit(X, y)

def transform(self, X):
if self.method == 'auto':
if hasattr(self.estimator, 'predict_proba'):
method = 'predict_proba'
elif hasattr(self.estimator, 'decision_function'):
method = 'decision_function'
elif hasattr(self.estimator, 'predict'):
method = 'predict'
else:
raise ValueError('Estimator has no valid method')
else:
method = self.method

output = getattr(self.estimator, method)(X)
output=np.array(output)

if len(output.shape) == 1:
output = output.reshape(-1,1)

if self.passthrough:
return np.hstack((output, X))
else:
return output



def fit_transform(self, X, y=None):
self.estimator.fit(X,y)

if self.method == 'auto':
if hasattr(self.estimator, 'predict_proba'):
method = 'predict_proba'
elif hasattr(self.estimator, 'decision_function'):
method = 'decision_function'
elif hasattr(self.estimator, 'predict'):
method = 'predict'
else:
raise ValueError('Estimator has no valid method')
else:
method = self.method

if self.cross_val_predict_cv > 0:
output = cross_val_predict(self.estimator, X, y=y, cv=self.cross_val_predict_cv)

else:
output = getattr(self.estimator, method)(X)
#reshape if needed

if len(output.shape) == 1:
output = output.reshape(-1,1)

output=np.array(output)
if self.passthrough:
return np.hstack((output, X))
else:
return output

def _estimator_has(attr):
'''Check if we can delegate a method to the underlying estimator.
First, we check the first fitted final estimator if available, otherwise we
check the unfitted final estimator.
'''
return lambda self: (self.estimator is not None and
hasattr(self.estimator, attr)
)

@available_if(_estimator_has('predict'))
def predict(self, X, **predict_params):
check_is_fitted(self.estimator)
#X = check_array(X)

preds = self.estimator.predict(X,**predict_params)
return preds

@available_if(_estimator_has('predict_proba'))
def predict_proba(self, X, **predict_params):
check_is_fitted(self.estimator)
#X = check_array(X)
return self.estimator.predict_proba(X,**predict_params)

@available_if(_estimator_has('decision_function'))
def decision_function(self, X, **predict_params):
check_is_fitted(self.estimator)
#X = check_array(X)
return self.estimator.decision_function(X,**predict_params)

def __sklearn_is_fitted__(self):
"""
Check fitted status and return a Boolean value.
"""
return check_is_fitted(self.estimator)


# @property
# def _estimator_type(self):
# return self.estimator._estimator_type



@property
def classes_(self):
"""The classes labels. Only exist if the last step is a classifier."""
return self.estimator._classes
2 changes: 1 addition & 1 deletion tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def HistGradientBoostingClassifier_hyperparameter_parser(params):


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = 0
# final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
Expand Down
18 changes: 13 additions & 5 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder
from tpot2.builtin_modules import ZeroCount, ColumnOneHotEncoder
from tpot2.builtin_modules import Passthrough
from sklearn.linear_model import SGDClassifier, LogisticRegression, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, ElasticNetCV, PassiveAggressiveClassifier, ARDRegression
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, ExtraTreesRegressor, ExtraTreesClassifier, AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
Expand All @@ -53,6 +54,7 @@
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
Passthrough,
]


Expand Down Expand Up @@ -147,6 +149,8 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
case "OverDominanceEncoder":
return {}

case "Passthrough":
return {}

#classifiers.py
case "LinearDiscriminantAnalysis":
Expand Down Expand Up @@ -335,7 +339,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta

space = {

'n': Float("n", bounds=(-1e3, 1e3)),
'n': Float("n", bounds=(-1e2, 1e2)),
}
)

Expand Down Expand Up @@ -389,19 +393,23 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
raise ValueError(f"Could not find configspace for {name}")


def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None):
def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True):


#if list of names, return a list of EstimatorNodes
if isinstance(name, list) or isinstance(name, np.ndarray):
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) for n in name]
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name]
#remove Nones
search_spaces = [s for s in search_spaces if s is not None]
return ChoicePipeline(search_spaces=search_spaces)

if return_choice_pipeline:
return ChoicePipeline(search_spaces=np.hstack(search_spaces))
else:
return np.hstack(search_spaces)

if name in GROUPNAMES:
name_list = GROUPNAMES[name]
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline)

return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,8 @@ def HistGradientBoostingRegressor_hyperparameter_parser(params):


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
# final_params['n_iter_no_change'] = 0
# final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
final_params['n_iter_no_change'] = params['n_iter_no_change']
Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/tests/test_get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_loop_through_all_hyperparameters():
estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

#generate 100 random hyperparameters and make sure they are all valid
for i in range(100):
for i in range(25):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()

Expand All @@ -37,6 +37,6 @@ def test_loop_through_groupnames():
estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

#generate 10 random hyperparameters and make sure they are all valid
for i in range(100):
for i in range(25):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()
1 change: 1 addition & 0 deletions tpot2/old_config_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .old_config_utils import convert_config_dict_to_list, convert_config_dict_to_choicepipeline, convert_config_dict_to_graphpipeline
140 changes: 140 additions & 0 deletions tpot2/old_config_utils/old_config_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
from ConfigSpace import EqualsCondition, OrConjunction, NotEqualsCondition, InCondition
from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING
from ..search_spaces.nodes import EstimatorNode
from ..search_spaces.pipelines import WrapperPipeline, ChoicePipeline, GraphPipeline
import ConfigSpace
import sklearn
from functools import partial
import inspect
import numpy as np

def load_get_module_from_string(module_string):
module_name, class_name = module_string.rsplit('.', 1)
module = __import__(module_name, fromlist=[class_name])
return getattr(module, class_name)


def hyperparameter_parser(hdict, function_params_conversion_dict):
d = hdict.copy()
d.update(function_params_conversion_dict)
return d



def get_node_space(module_string, params):
method = load_get_module_from_string(module_string)
config_space = ConfigurationSpace()
sub_space = None
sub_space_name = None

function_params_conversion_dict = {}

if params is None:
return EstimatorNode(method=method, space=config_space)

for param_name, param in params.items():
if param is None:
config_space.add_hyperparameter(Categorical(param_name, [NONE_SPECIAL_STRING]))

if isinstance(param, range):
param = list(param)

if isinstance(param, list) or isinstance(param, np.ndarray):
if len(param) == 0:
p = param[0]
if p is None:
p = NONE_SPECIAL_STRING
elif type(p) == bool:
p = TRUE_SPECIAL_STRING if p else FALSE_SPECIAL_STRING

config_space.add_hyperparameter(ConfigSpace.hyperparameters.Constant(param_name, p))
else:
config_space.add_hyperparameter(Categorical(param_name, param))
# if all(isinstance(i, int) for i in param):
# config_space.add_hyperparameter(Integer(param_name, (min(param), max(param))))
# elif all(isinstance(i, float) for i in param):
# config_space.add_hyperparameter(Float(param_name, (min(param), max(param))))
# else:
# config_space.add_hyperparameter(Categorical(param_name, param))
elif isinstance(param, dict): #TPOT1 config dicts have dictionaries for values of hyperparameters that are either a function or an estimator
if len(param) > 1:
raise ValueError(f"Multiple items in dictionary entry for {param_name}")

key = list(param.keys())[0]

innermethod = load_get_module_from_string(key)

if inspect.isclass(innermethod) and issubclass(innermethod, sklearn.base.BaseEstimator): #is an estimator
if sub_space is None:
sub_space_name = param_name
sub_space = get_node_space(key, param[key])
else:
raise ValueError("Only multiple hyperparameters are estimators. Only one parameter ")

else: #assume the key is a function and ignore the value
function_params_conversion_dict[param_name] = innermethod

else:
# config_space.add_hyperparameter(Categorical(param_name, param))
config_space.add_hyperparameter(ConfigSpace.hyperparameters.Constant(param_name, param))

parser=None
if len(function_params_conversion_dict) > 0:
parser = partial(hyperparameter_parser, function_params_conversion_dict)


if sub_space is None:

if parser is not None:
return EstimatorNode(method=method, space=config_space, hyperparameter_parser=parser)
else:
return EstimatorNode(method=method, space=config_space)


else:
if parser is not None:
return WrapperPipeline(method=method, space=config_space, estimator_search_space=sub_space, wrapped_param_name=sub_space_name, hyperparameter_parser=parser)
else:
return WrapperPipeline(method=method, space=config_space, estimator_search_space=sub_space, wrapped_param_name=sub_space_name)


def convert_config_dict_to_list(config_dict):
search_spaces = []
for key, value in config_dict.items():
search_spaces.append(get_node_space(key, value))
return search_spaces


def convert_config_dict_to_choicepipeline(config_dict):
search_spaces = []
for key, value in config_dict.items():
search_spaces.append(get_node_space(key, value))
return ChoicePipeline(search_spaces)

#Note doesn't convert estimators so they passthrough inputs like in TPOT1
def convert_config_dict_to_graphpipeline(config_dict):
root_search_spaces = []
inner_search_spaces = []

for key, value in config_dict.items():
#if root
if issubclass(load_get_module_from_string(key), sklearn.base.ClassifierMixin) or issubclass(load_get_module_from_string(key), sklearn.base.RegressorMixin):
root_search_spaces.append(get_node_space(key, value))
else:
inner_search_spaces.append(get_node_space(key, value))

if len(root_search_spaces) == 0:
Warning("No classifiers or regressors found, allowing any estimator to be the root node")
root_search_spaces = inner_search_spaces

#merge inner and root search spaces

inner_space = np.concatenate([root_search_spaces,inner_search_spaces])

root_space = ChoicePipeline(root_search_spaces)
inner_space = ChoicePipeline(inner_search_spaces)

final_space = GraphPipeline(root_search_space=root_space, inner_search_space=inner_space)
return final_space
Loading
Loading