Skip to content

Commit

Permalink
Merge pull request #130 from perib/new_search_space_def
Browse files Browse the repository at this point in the history
fix num_nodes test, add update checker, setup.py change
  • Loading branch information
perib authored Apr 24, 2024
2 parents bb8a919 + 47b7fd0 commit 8de41fb
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 42 deletions.
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,10 @@ def calculate_version():
'lightgbm>=3.3.3',
'optuna>=3.0.5',
'baikal>=0.4.2',
'jupyter>=1.0.0',
'networkx>=3.0',
'dask>=2024.4.2',
'distributed>=2024.4.2',
'dask-expr>=1.0.12',
'dask-ml>=2023.4.20',
'dask-jobqueue>=0.8.5',
'func_timeout>=4.3.5',
'configspace>=0.7.1',
Expand Down
4 changes: 4 additions & 0 deletions tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@


from .tpot_estimator import TPOTClassifier, TPOTRegressor, TPOTEstimator, TPOTEstimatorSteadyState

from update_checker import update_check
from ._version import __version__
update_check("tpot2",__version__)
2 changes: 1 addition & 1 deletion tpot2/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.5-alpha'
__version__ = '0.1.7-alpha'
18 changes: 10 additions & 8 deletions tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ def get_HistGradientBoostingClassifier_ConfigurationSpace(n_features, random_sta
validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid")

space = {
'loss': Categorical("loss", ['log_loss', 'exponential']),
'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True),
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)),
'max_features': Float("max_features", bounds=(0.1,1.0)),
Expand All @@ -432,7 +431,6 @@ def get_HistGradientBoostingClassifier_ConfigurationSpace(n_features, random_sta
def HistGradientBoostingClassifier_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'max_features': params['max_features'],
Expand All @@ -447,7 +445,7 @@ def HistGradientBoostingClassifier_hyperparameter_parser(params):


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
Expand Down Expand Up @@ -477,12 +475,14 @@ def get_MLPClassifier_ConfigurationSpace(random_state):

n_hidden_layers = Integer("n_hidden_layers", bounds=(1, 3))
n_nodes_per_layer = Integer("n_nodes_per_layer", bounds=(16, 512))
activation = Categorical("activation", ['tanh', 'relu'])
alpha = Float("alpha", bounds=(1e-7, 1e-1), log=True)
learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True)
activation = Categorical("activation", ["identity", "logistic",'tanh', 'relu'])
alpha = Float("alpha", bounds=(1e-4, 1e-1), log=True)
early_stopping = Categorical("early_stopping", [True,False])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping])
learning_rate_init = Float("learning_rate_init", bounds=(1e-4, 1e-1), log=True)
learning_rate = Categorical("learning_rate", ['constant', 'invscaling', 'adaptive'])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping, learning_rate_init])

return cs

Expand All @@ -492,8 +492,10 @@ def MLPClassifier_hyperparameter_parser(params):
'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'],
'activation': params['activation'],
'alpha': params['alpha'],
'learning_rate': params['learning_rate'],
'early_stopping': params['early_stopping'],

'learning_rate_init': params['learning_rate_init'],
'learning_rate': params['learning_rate'],
}

if 'random_state' in params:
Expand Down
14 changes: 10 additions & 4 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,15 @@


# not including "PassiveAggressiveClassifier" in classifiers since it is mainly for larger than memory datasets/online use cases

# TODO need to subclass "GaussianProcessClassifier" and 'GaussianProcessRegressor'. These require n_features as a parameter for the kernel, but n_features may be different depending on selection functions or transformations previously in the pipeline.

GROUPNAMES = {
"selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",],
"selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"],
"selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", "GaussianProcessClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],


"transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"],
Expand Down Expand Up @@ -263,7 +266,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
case "FastICA":
return transformers.get_FastICA_configspace(n_features=n_features, random_state=random_state)
case "FeatureAgglomeration":
return transformers.get_FeatureAgglomeration_configspace(n_features=n_features,)
return transformers.get_FeatureAgglomeration_configspace(n_samples=n_samples)
case "Nystroem":
return transformers.get_Nystroem_configspace(n_features=n_features, random_state=random_state)
case "RBFSampler":
Expand Down Expand Up @@ -435,9 +438,12 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
if name == "HistGradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
if name == "GradientBoostingRegressor" or name == "HistGradientBoostingRegressor":
if name == "GradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
if name == "HistGradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
if name == "MLPClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
Expand Down
53 changes: 44 additions & 9 deletions tpot2/config/regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,36 @@ def get_GradientBoostingRegressor_ConfigurationSpace(n_features, random_state):
cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond])
return cs

def GradientBoostingRegressor_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'min_samples_split': params['min_samples_split'],
'max_features': params['max_features'],
'max_leaf_nodes': params['max_leaf_nodes'],
'max_depth': params['max_depth'],
'tol': params['tol'],
'subsample': params['subsample']
}

if 'random_state' in params:
final_params['random_state'] = params['random_state']

if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['validation_fraction'] = None
elif params['early_stop'] == 'valid':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = params['validation_fraction']
elif params['early_stop'] == 'train':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = None


return final_params

#only difference is l2_regularization
def get_HistGradientBoostingRegressor_ConfigurationSpace(n_features, random_state):
early_stop = Categorical("early_stop", ["off", "valid", "train"])
Expand Down Expand Up @@ -443,39 +473,40 @@ def get_HistGradientBoostingRegressor_ConfigurationSpace(n_features, random_stat

return cs

def GradientBoostingRegressor_hyperparameter_parser(params):

def HistGradientBoostingRegressor_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'max_features': params['max_features'],
'max_leaf_nodes': params['max_leaf_nodes'],
'max_depth': params['max_depth'],
'tol': params['tol'],
'l2_regularization': params['l2_regularization']
}

if "l2_regularization" in params:
final_params['l2_regularization'] = params['l2_regularization']

if 'random_state' in params:
final_params['random_state'] = params['random_state']


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = params['validation_fraction']
final_params['early_stopping'] = True
elif params['early_stop'] == 'train':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = None
final_params['early_stopping'] = True


return final_params



###

def get_MLPRegressor_ConfigurationSpace(random_state):
Expand All @@ -495,7 +526,10 @@ def get_MLPRegressor_ConfigurationSpace(random_state):
learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True)
early_stopping = Categorical("early_stopping", [True,False])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping])
learning_rate_init = Float("learning_rate_init", bounds=(1e-4, 1e-1), log=True)
learning_rate = Categorical("learning_rate", ['constant', 'invscaling', 'adaptive'])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping, learning_rate_init])

return cs

Expand All @@ -505,8 +539,9 @@ def MLPRegressor_hyperparameter_parser(params):
'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'],
'activation': params['activation'],
'alpha': params['alpha'],
'learning_rate': params['learning_rate'],
'early_stopping': params['early_stopping'],
'learning_rate_init': params['learning_rate_init'],
'learning_rate': params['learning_rate'],
}

if 'random_state' in params:
Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def get_FastICA_configspace(n_features=100, random_state=None):

)

def get_FeatureAgglomeration_configspace(n_features=100):
def get_FeatureAgglomeration_configspace(n_samples):

linkage = Categorical('linkage', ['ward', 'complete', 'average'])
metric = Categorical('metric', ['euclidean', 'l1', 'l2', 'manhattan', 'cosine'])
n_clusters = Integer('n_clusters', bounds=(2, 400))
n_clusters = Integer('n_clusters', bounds=(2, min(n_samples,400)))
pooling_func = Categorical('pooling_func', ['mean', 'median', 'max'])

metric_condition = NotEqualsCondition(metric, linkage, 'ward')
Expand Down
2 changes: 1 addition & 1 deletion tpot2/objectives/number_of_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def number_of_nodes_objective(est):
if isinstance(est, GraphPipeline):
return sum(node["instance"] for node in est.graph.nodes)
return sum(number_of_nodes_objective(est.graph.nodes[node]["instance"]) for node in est.graph.nodes)
if isinstance(est, Pipeline):
return sum(number_of_nodes_objective(estimator) for _,estimator in est.steps)
if isinstance(est, sklearn.pipeline.FeatureUnion):
Expand Down
1 change: 0 additions & 1 deletion tpot2/objectives/tests/test_complexity_objective.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from ..complexity import BernoulliNB_Complexity, GaussianNB_Complexity, MultinomialNB_Complexity
6 changes: 5 additions & 1 deletion tpot2/search_spaces/pipelines/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,11 @@ def generate(self, rng=None):
self.cross_val_predict_cv, self.method, self.memory, self.use_label_encoder, rng=rng)
# if user specified limit, grab a random number between that limit

n_nodes = min(rng.integers(1, self.max_size), 5)
if self.max_size is None or self.max_size == np.inf:
n_nodes = rng.integers(1, 5)
else:
n_nodes = min(rng.integers(1, self.max_size), 5)

starting_ops = []
if self.inner_search_space is not None:
starting_ops.append(ind._mutate_insert_inner_node)
Expand Down
41 changes: 28 additions & 13 deletions tpot2/tpot_estimator/templates/tpottemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__( self,
memory_limit = "4GB",
client = None,
random_state=None,
allow_inner_regressors=True,
**tpotestimator_kwargs,
):
"""
Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__( self,
self.memory_limit = memory_limit
self.client = client
self.random_state = random_state
self.allow_inner_regressors = allow_inner_regressors
self.tpotestimator_kwargs = tpotestimator_kwargs

self.initialized = False
Expand All @@ -71,13 +73,18 @@ def fit(self, X, y):
"n_features":X.shape[1],
"random_state":self.random_state}

search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","regressors","scalers"],**get_search_space_params),
max_size = 10,
)

if self.allow_inner_regressors:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","regressors","scalers"],**get_search_space_params),
)
else:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","scalers"],**get_search_space_params),
)

super(TPOTRegressor,self).__init__(
search_space=search_space,
Expand Down Expand Up @@ -134,6 +141,7 @@ def __init__( self,
memory_limit = "4GB",
client = None,
random_state=None,
allow_inner_classifiers=True,
**tpotestimator_kwargs,

):
Expand Down Expand Up @@ -164,6 +172,7 @@ def __init__( self,
self.client = client
self.random_state = random_state
self.tpotestimator_kwargs = tpotestimator_kwargs
self.allow_inner_classifiers = allow_inner_classifiers

self.initialized = False

Expand All @@ -176,12 +185,18 @@ def fit(self, X, y):
"n_features":X.shape[1],
"random_state":self.random_state}

search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers", "scalers"], **get_search_space_params),
max_size = 10,
)
if self.allow_inner_classifiers:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers", "scalers"], **get_search_space_params),
)
else:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","scalers"], **get_search_space_params),
)


super(TPOTClassifier,self).__init__(
Expand Down

0 comments on commit 8de41fb

Please sign in to comment.