EpistasisLab · perib · May 15, 2024 · Apr 25, 2024 · May 3, 2024 · May 4, 2024
diff --git a/tpot2/__init__.py b/tpot2/__init__.py
@@ -15,7 +15,7 @@
 from . import objectives
 from . import selectors
 from . import tpot_estimator
-
+from . import old_config_utils
 
 from .tpot_estimator import TPOTClassifier, TPOTRegressor, TPOTEstimator, TPOTEstimatorSteadyState
 

diff --git a/tpot2/builtin_modules/__init__.py b/tpot2/builtin_modules/__init__.py
@@ -5,4 +5,5 @@
 from .arithmetictransformer import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
 from .passthrough import Passthrough
 from .imputer import ColumnSimpleImputer
-from .selector_wrappers import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor
+from .selector_wrappers import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor
+from .estimatortransformer import EstimatorTransformer
diff --git a/tpot2/builtin_modules/estimatortransformer.py b/tpot2/builtin_modules/estimatortransformer.py
@@ -0,0 +1,121 @@
+from numpy import ndarray
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.model_selection import cross_val_predict
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.metaestimators import available_if
+import numpy as np
+from sklearn.utils.validation import check_is_fitted
+
+class EstimatorTransformer(BaseEstimator, TransformerMixin):
+ def __init__(self, estimator, method='auto', passthrough=False, cross_val_predict_cv=0):
+ self.estimator = estimator
+ self.method = method
+ self.passthrough = passthrough
+ self.cross_val_predict_cv = cross_val_predict_cv
+
+ def fit(self, X, y=None):
+ return self.estimator.fit(X, y)
+
+ def transform(self, X):
+ if self.method == 'auto':
+ if hasattr(self.estimator, 'predict_proba'):
+ method = 'predict_proba'
+ elif hasattr(self.estimator, 'decision_function'):
+ method = 'decision_function'
+ elif hasattr(self.estimator, 'predict'):
+ method = 'predict'
+ else:
+ raise ValueError('Estimator has no valid method')
+ else:
+ method = self.method
+
+ output = getattr(self.estimator, method)(X)
+ output=np.array(output)
+
+ if len(output.shape) == 1:
+ output = output.reshape(-1,1)
+
+ if self.passthrough:
+ return np.hstack((output, X))
+ else:
+ return output
+
+
+
+ def fit_transform(self, X, y=None):
+ self.estimator.fit(X,y)
+
+ if self.method == 'auto':
+ if hasattr(self.estimator, 'predict_proba'):
+ method = 'predict_proba'
+ elif hasattr(self.estimator, 'decision_function'):
+ method = 'decision_function'
+ elif hasattr(self.estimator, 'predict'):
+ method = 'predict'
+ else:
+ raise ValueError('Estimator has no valid method')
+ else:
+ method = self.method
+
+ if self.cross_val_predict_cv > 0:
+ output = cross_val_predict(self.estimator, X, y=y, cv=self.cross_val_predict_cv)
+
+ else:
+ output = getattr(self.estimator, method)(X)
+ #reshape if needed
+
+ if len(output.shape) == 1:
+ output = output.reshape(-1,1)
+
+ output=np.array(output)
+ if self.passthrough:
+ return np.hstack((output, X))
+ else:
+ return output
+
+ def _estimator_has(attr):
+ '''Check if we can delegate a method to the underlying estimator.
+ First, we check the first fitted final estimator if available, otherwise we
+ check the unfitted final estimator.
+ '''
+ return lambda self: (self.estimator is not None and
+ hasattr(self.estimator, attr)
+ )
+
+ @available_if(_estimator_has('predict'))
+ def predict(self, X, **predict_params):
+ check_is_fitted(self.estimator)
+ #X = check_array(X)
+
+ preds = self.estimator.predict(X,**predict_params)
+ return preds
+
+ @available_if(_estimator_has('predict_proba'))
+ def predict_proba(self, X, **predict_params):
+ check_is_fitted(self.estimator)
+ #X = check_array(X)
+ return self.estimator.predict_proba(X,**predict_params)
+
+ @available_if(_estimator_has('decision_function'))
+ def decision_function(self, X, **predict_params):
+ check_is_fitted(self.estimator)
+ #X = check_array(X)
+ return self.estimator.decision_function(X,**predict_params)
+
+ def __sklearn_is_fitted__(self):
+ """
+ Check fitted status and return a Boolean value.
+ """
+ return check_is_fitted(self.estimator)
+
+
+ # @property
+ # def _estimator_type(self):
+ # return self.estimator._estimator_type
+
+
+
+ @property
+ def classes_(self):
+ """The classes labels. Only exist if the last step is a classifier."""
+ return self.estimator._classes
diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py
@@ -445,7 +445,7 @@ def HistGradientBoostingClassifier_hyperparameter_parser(params):
 
 
  if params['early_stop'] == 'off':
- final_params['n_iter_no_change'] = 0
+ # final_params['n_iter_no_change'] = 0
  final_params['validation_fraction'] = None
  final_params['early_stopping'] = False
  elif params['early_stop'] == 'valid':

diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py
@@ -27,6 +27,7 @@
 from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
 from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder 
 from tpot2.builtin_modules import ZeroCount, ColumnOneHotEncoder
+from tpot2.builtin_modules import Passthrough
 from sklearn.linear_model import SGDClassifier, LogisticRegression, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, ElasticNetCV, PassiveAggressiveClassifier, ARDRegression
 from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, ExtraTreesRegressor, ExtraTreesClassifier, AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor
 from sklearn.neural_network import MLPClassifier, MLPRegressor
@@ -53,6 +54,7 @@
  PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
  DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
  GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
+ Passthrough,
  ]
 
 
@@ -147,6 +149,8 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
  case "OverDominanceEncoder":
  return {}
 
+ case "Passthrough":
+ return {}
 
  #classifiers.py
  case "LinearDiscriminantAnalysis":
@@ -335,7 +339,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
 
  space = {
 
- 'n': Float("n", bounds=(-1e3, 1e3)),
+ 'n': Float("n", bounds=(-1e2, 1e2)),
  }
  ) 
 
@@ -389,19 +393,23 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
  raise ValueError(f"Could not find configspace for {name}")
 
 
-def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None):
+def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True):
 
 
  #if list of names, return a list of EstimatorNodes
  if isinstance(name, list) or isinstance(name, np.ndarray):
- search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) for n in name]
+ search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name]
  #remove Nones
  search_spaces = [s for s in search_spaces if s is not None]
- return ChoicePipeline(search_spaces=search_spaces)
+
+ if return_choice_pipeline:
+ return ChoicePipeline(search_spaces=np.hstack(search_spaces))
+ else:
+ return np.hstack(search_spaces)
 
  if name in GROUPNAMES:
  name_list = GROUPNAMES[name]
- return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
+ return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline)
 
  return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
 

diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py
@@ -491,8 +491,8 @@ def HistGradientBoostingRegressor_hyperparameter_parser(params):
 
 
  if params['early_stop'] == 'off':
- final_params['n_iter_no_change'] = 0
- final_params['validation_fraction'] = None
+ # final_params['n_iter_no_change'] = 0
+ # final_params['validation_fraction'] = None
  final_params['early_stopping'] = False
  elif params['early_stop'] == 'valid':
  final_params['n_iter_no_change'] = params['n_iter_no_change']

diff --git a/tpot2/config/tests/test_get_configspace.py b/tpot2/config/tests/test_get_configspace.py
@@ -20,7 +20,7 @@ def test_loop_through_all_hyperparameters():
  estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
 
  #generate 100 random hyperparameters and make sure they are all valid
- for i in range(100):
+ for i in range(25):
  estnode = estnode_gen.generate()
  est = estnode.export_pipeline()
 
@@ -37,6 +37,6 @@ def test_loop_through_groupnames():
  estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
 
  #generate 10 random hyperparameters and make sure they are all valid
- for i in range(100):
+ for i in range(25):
  estnode = estnode_gen.generate()
  est = estnode.export_pipeline()
diff --git a/tpot2/old_config_utils/__init__.py b/tpot2/old_config_utils/__init__.py
@@ -0,0 +1 @@
+from .old_config_utils import convert_config_dict_to_list, convert_config_dict_to_choicepipeline, convert_config_dict_to_graphpipeline 
diff --git a/tpot2/old_config_utils/old_config_utils.py b/tpot2/old_config_utils/old_config_utils.py
@@ -0,0 +1,140 @@
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
+from ConfigSpace import EqualsCondition, OrConjunction, NotEqualsCondition, InCondition
+from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING
+from ..search_spaces.nodes import EstimatorNode
+from ..search_spaces.pipelines import WrapperPipeline, ChoicePipeline, GraphPipeline
+import ConfigSpace
+import sklearn
+from functools import partial
+import inspect
+import numpy as np
+
+def load_get_module_from_string(module_string):
+ module_name, class_name = module_string.rsplit('.', 1)
+ module = __import__(module_name, fromlist=[class_name])
+ return getattr(module, class_name)
+
+
+def hyperparameter_parser(hdict, function_params_conversion_dict):
+ d = hdict.copy()
+ d.update(function_params_conversion_dict)
+ return d
+
+
+
+def get_node_space(module_string, params):
+ method = load_get_module_from_string(module_string)
+ config_space = ConfigurationSpace()
+ sub_space = None
+ sub_space_name = None
+
+ function_params_conversion_dict = {}
+
+ if params is None:
+ return EstimatorNode(method=method, space=config_space)
+
+ for param_name, param in params.items():
+ if param is None:
+ config_space.add_hyperparameter(Categorical(param_name, [NONE_SPECIAL_STRING]))
+
+ if isinstance(param, range):
+ param = list(param)
+
+ if isinstance(param, list) or isinstance(param, np.ndarray):
+ if len(param) == 0:
+ p = param[0]
+ if p is None:
+ p = NONE_SPECIAL_STRING
+ elif type(p) == bool:
+ p = TRUE_SPECIAL_STRING if p else FALSE_SPECIAL_STRING
+
+ config_space.add_hyperparameter(ConfigSpace.hyperparameters.Constant(param_name, p))
+ else:
+ config_space.add_hyperparameter(Categorical(param_name, param))
+ # if all(isinstance(i, int) for i in param):
+ # config_space.add_hyperparameter(Integer(param_name, (min(param), max(param))))
+ # elif all(isinstance(i, float) for i in param):
+ # config_space.add_hyperparameter(Float(param_name, (min(param), max(param))))
+ # else:
+ # config_space.add_hyperparameter(Categorical(param_name, param))
+ elif isinstance(param, dict): #TPOT1 config dicts have dictionaries for values of hyperparameters that are either a function or an estimator
+ if len(param) > 1:
+ raise ValueError(f"Multiple items in dictionary entry for {param_name}")
+
+ key = list(param.keys())[0]
+
+ innermethod = load_get_module_from_string(key)
+
+ if inspect.isclass(innermethod) and issubclass(innermethod, sklearn.base.BaseEstimator): #is an estimator
+ if sub_space is None:
+ sub_space_name = param_name
+ sub_space = get_node_space(key, param[key]) 
+ else:
+ raise ValueError("Only multiple hyperparameters are estimators. Only one parameter ")
+
+ else: #assume the key is a function and ignore the value
+ function_params_conversion_dict[param_name] = innermethod
+
+ else:
+ # config_space.add_hyperparameter(Categorical(param_name, param))
+ config_space.add_hyperparameter(ConfigSpace.hyperparameters.Constant(param_name, param))
+
+ parser=None
+ if len(function_params_conversion_dict) > 0:
+ parser = partial(hyperparameter_parser, function_params_conversion_dict)
+
+
+ if sub_space is None:
+
+ if parser is not None:
+ return EstimatorNode(method=method, space=config_space, hyperparameter_parser=parser)
+ else:
+ return EstimatorNode(method=method, space=config_space)
+
+
+ else:
+ if parser is not None:
+ return WrapperPipeline(method=method, space=config_space, estimator_search_space=sub_space, wrapped_param_name=sub_space_name, hyperparameter_parser=parser)
+ else:
+ return WrapperPipeline(method=method, space=config_space, estimator_search_space=sub_space, wrapped_param_name=sub_space_name)
+
+
+def convert_config_dict_to_list(config_dict):
+ search_spaces = []
+ for key, value in config_dict.items():
+ search_spaces.append(get_node_space(key, value))
+ return search_spaces
+
+
+def convert_config_dict_to_choicepipeline(config_dict):
+ search_spaces = []
+ for key, value in config_dict.items():
+ search_spaces.append(get_node_space(key, value))
+ return ChoicePipeline(search_spaces)
+
+#Note doesn't convert estimators so they passthrough inputs like in TPOT1
+def convert_config_dict_to_graphpipeline(config_dict):
+ root_search_spaces = []
+ inner_search_spaces = []
+
+ for key, value in config_dict.items():
+ #if root
+ if issubclass(load_get_module_from_string(key), sklearn.base.ClassifierMixin) or issubclass(load_get_module_from_string(key), sklearn.base.RegressorMixin):
+ root_search_spaces.append(get_node_space(key, value))
+ else:
+ inner_search_spaces.append(get_node_space(key, value))
+
+ if len(root_search_spaces) == 0:
+ Warning("No classifiers or regressors found, allowing any estimator to be the root node")
+ root_search_spaces = inner_search_spaces
+
+ #merge inner and root search spaces
+
+ inner_space = np.concatenate([root_search_spaces,inner_search_spaces])
+
+ root_space = ChoicePipeline(root_search_spaces)
+ inner_space = ChoicePipeline(inner_search_spaces)
+
+ final_space = GraphPipeline(root_search_space=root_space, inner_search_space=inner_space)
+ return final_space