From 04886adf3c5c305344524bec4b3af71bb0a0869c Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 19 Mar 2024 15:31:40 -0700 Subject: [PATCH 01/75] new search space working with est --- Tutorial/2_Search_Spaces.ipynb | 832 +++++++++++ setup.py | 1 + tpot2/__init__.py | 4 +- tpot2/config/__init__.py | 22 +- tpot2/config/classifiers.py | 430 +++--- tpot2/config/get_configspace.py | 186 +++ tpot2/config/hyperparametersuggestor.py | 194 --- tpot2/config/imputers.py | 2 + tpot2/config/mdr_configs.py | 80 +- tpot2/config/selectors.py | 128 +- tpot2/config/transformers.py | 157 +-- tpot2/evolvers/__init__.py | 2 +- tpot2/evolvers/base_evolver.py | 26 +- tpot2/evolvers/steady_state_evolver.py | 16 +- .../individual.py | 14 +- tpot2/individual_representations/__init__.py | 5 - .../graph_pipeline_individual/__init__.py | 4 - .../graph_utils/__init__.py | 1 - .../graph_pipeline_individual/individual.py | 1222 ----------------- .../optuna_optimize.py | 228 --- .../graph_pipeline_individual/templates.py | 75 - .../subset_selector/__init__.py | 1 - .../subset_selector/subsetselector.py | 57 - tpot2/population.py | 120 +- tpot2/search_spaces/__init__.py | 4 + tpot2/search_spaces/base.py | 34 + tpot2/search_spaces/nodes/__init__.py | 2 + tpot2/search_spaces/nodes/estimator_node.py | 55 + .../nodes/estimator_node_simple.py | 64 + .../nodes/genetic_feature_selection.py | 178 +++ tpot2/search_spaces/pipelines/__init__.py | 6 + tpot2/search_spaces/pipelines/choice.py | 52 + .../search_spaces/pipelines/dynamic_linear.py | 97 ++ .../pipelines/genetic_sample_weight.py | 1 + tpot2/search_spaces/pipelines/graph.py | 645 +++++++++ .../pipelines}/graph_utils.py | 8 +- .../pipelines/hierarchical_individual.py | 1 + tpot2/search_spaces/pipelines/sequential.py | 62 + tpot2/search_spaces/pipelines/tree.py | 50 + tpot2/search_spaces/pipelines/wrapper.py | 84 ++ .../templates}/__init__.py | 0 tpot2/search_spaces/templates/autoqtl.py | 0 tpot2/search_spaces/templates/stc.py | 0 tpot2/selectors/lexicase_selection.py | 4 +- .../max_weighted_average_selector.py | 2 +- tpot2/selectors/nsgaii.py | 2 +- tpot2/selectors/random_selector.py | 4 +- tpot2/selectors/tournament_selection.py | 4 +- .../tournament_selection_dominated.py | 4 +- tpot2/tpot_estimator/estimator.py | 133 +- .../tpot_estimator/steady_state_estimator.py | 4 +- 51 files changed, 2803 insertions(+), 2504 deletions(-) create mode 100644 Tutorial/2_Search_Spaces.ipynb create mode 100644 tpot2/config/get_configspace.py delete mode 100644 tpot2/config/hyperparametersuggestor.py create mode 100644 tpot2/config/imputers.py rename tpot2/{individual_representations => }/individual.py (82%) delete mode 100644 tpot2/individual_representations/__init__.py delete mode 100644 tpot2/individual_representations/graph_pipeline_individual/__init__.py delete mode 100644 tpot2/individual_representations/graph_pipeline_individual/graph_utils/__init__.py delete mode 100644 tpot2/individual_representations/graph_pipeline_individual/individual.py delete mode 100644 tpot2/individual_representations/graph_pipeline_individual/optuna_optimize.py delete mode 100644 tpot2/individual_representations/graph_pipeline_individual/templates.py delete mode 100644 tpot2/individual_representations/subset_selector/__init__.py delete mode 100644 tpot2/individual_representations/subset_selector/subsetselector.py create mode 100644 tpot2/search_spaces/__init__.py create mode 100644 tpot2/search_spaces/base.py create mode 100644 tpot2/search_spaces/nodes/__init__.py create mode 100644 tpot2/search_spaces/nodes/estimator_node.py create mode 100644 tpot2/search_spaces/nodes/estimator_node_simple.py create mode 100644 tpot2/search_spaces/nodes/genetic_feature_selection.py create mode 100644 tpot2/search_spaces/pipelines/__init__.py create mode 100644 tpot2/search_spaces/pipelines/choice.py create mode 100644 tpot2/search_spaces/pipelines/dynamic_linear.py create mode 100644 tpot2/search_spaces/pipelines/genetic_sample_weight.py create mode 100644 tpot2/search_spaces/pipelines/graph.py rename tpot2/{individual_representations/graph_pipeline_individual/graph_utils => search_spaces/pipelines}/graph_utils.py (93%) create mode 100644 tpot2/search_spaces/pipelines/hierarchical_individual.py create mode 100644 tpot2/search_spaces/pipelines/sequential.py create mode 100644 tpot2/search_spaces/pipelines/tree.py create mode 100644 tpot2/search_spaces/pipelines/wrapper.py rename tpot2/{individual_representations/graph_pipeline_individual/test => search_spaces/templates}/__init__.py (100%) create mode 100644 tpot2/search_spaces/templates/autoqtl.py create mode 100644 tpot2/search_spaces/templates/stc.py diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb new file mode 100644 index 00000000..853ca61f --- /dev/null +++ b/Tutorial/2_Search_Spaces.ipynb @@ -0,0 +1,832 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Everything can be done with the TPOTEstimator class. All other classes (TPOTRegressor, TPOTClassifier, TPOTSymbolicClassifier, TPOTSymbolicRegression, TPOTGeneticFeatureSetSelector, etc.) are actually just different default settings for TPOTEstimator.\n", + "\n", + "\n", + "By Default, TPOT will generate pipelines with a default set of classifiers or regressors as roots (this depends on whether classification is set to true or false). All other nodes are selected from a default list of selectors and transformers. Note: This differs from the TPOT1 behavior where by default classifiers and regressors can appear in locations other than the root. You can modify the the search space for leaves, inner nodes, and roots (final classifiers) separately through built in options or custom configuration dictionaries.\n", + "\n", + "In this tutorial we will walk through using the built in configurations, creating custom configurations, and using nested configurations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ConfigSpace\n", + "\n", + "Hyperparameter search spaces are defined using the [ConfigSpace package found here](https://github.com/automl/ConfigSpace). More information on how to set up a hyperparameter space can be found in their [documentation here](https://automl.github.io/ConfigSpace/main/guide.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled hyperparameters\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 6, 'p': 2, 'weights': 'distance'}\n" + ] + } + ], + "source": [ + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "knn_configspace = ConfigurationSpace(\n", + " space = {\n", + "\n", + " 'n_neighbors': Integer(\"n_neighbors\", bounds=(1, 10)),\n", + " 'weights': Categorical(\"weights\", ['uniform', 'distance']),\n", + " 'p': Integer(\"p\", bounds=(1, 3)),\n", + " 'metric': Categorical(\"metric\", ['euclidean', 'minkowski']),\n", + " 'n_jobs': 1,\n", + " }\n", + ")\n", + "\n", + "hyperparameters = dict(knn_configspace.sample_configuration())\n", + "print(\"sampled hyperparameters\")\n", + "print(hyperparameters)\n", + "\n", + "knn = KNeighborsClassifier(**hyperparameters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TPOT Search spaces\n", + "\n", + "TPOT allows you to both hyperparameter search spaces for individual methods as well as pipeline structure search spaces. For example, TPOT can create linear pipelines, trees, or graphs. \n", + "\n", + "TPOT search spaces are found in the `search_spaces` module. There are two primary kinds of search spaces, node and pipeline. Node search spaces specify the search space of a single sklearn `BaseEstimator`. Pipeline search spaces define the possible structures for a group of node search spaces. These take in node search spaces and produce a pipeline using nodes from that search space. Since sklearn Pipelines are also `BaseEstimator`, pipeline search spaces are also technically node search spaces. Meaning that pipeline search spaces can take in other pipeline search spaces in order to define more complex structures. The primary differentiating factor bewteen node and pipeline search spaces is that pipeline search spaces must take in another search space as input to feed its individual nodes. Therefore, all search spaces eventually end in a node search space at the lowest level. Note that parameters for pipeline search spaces can differ, some take in only a single search space, some take in a list, or some take in multiple defined parameters.\n", + "\n", + "search spaces can be found in tpot2.search_spaces.nodes and tpot2.search_spaces.pipelines\n", + "\n", + "### node search spaces\n", + "found in tpot2.search_spaces.nodes\n", + "\n", + "\n", + "EstimatorNode, GeneticFeatureSelector\n", + "| Name | Info |\n", + "| :--- | :----: |\n", + "| EstimatorNode | Takes in a ConfigSpace along with the class of the method. This node will optimize the hyperparameters for a single method. |\n", + "| GeneticFeatureSelector | Uses evolution to optimize a set of features, exports a basic sklearn Selector that simply selects the features chosen by the node. |\n", + "\n", + "\n", + "\n", + "\n", + "### pipeline search spaces\n", + "\n", + "found in tpot2.search_spaces.pipelines\n", + "\n", + "WrapperPipeline - This search space is for wrapping a sklearn estimator with a method that takes another estimator and hyperparameters as arguments.\n", + " For example, this can be used with sklearn.ensemble.BaggingClassifier or sklearn.ensemble.AdaBoostClassifier.\n", + "\n", + "\n", + "| Name | Info |\n", + "| :--- | :----: |\n", + "| ChoicePipeline | Takes in a list of search spaces. Will select one node from the search space. |\n", + "| SquentialPipeline | Takes in a list of search spaces. will produce a pipeline of Squential length. Each step in the pipeline will correspond to the the search space provided in the same index. |\n", + "| DynamicLinearPipeline | Takes in a single search space. Will produce a linear pipeline of variable length. Each step in the pipeline will be pulled from the search space provided. |\n", + "| TreePipeline |Generates a pipeline of variable length. Pipeline will have a tree structure similar to TPOT1. |\n", + "| GraphPipeline | Generates a directed acyclic graph of variable size. Search spaces for root, leaf, and inner nodes can be defined separately if desired. |\n", + "| WrapperPipeline | This search space is for wrapping a sklearn estimator with a method that takes another estimator and hyperparameters as arguments. For example, this can be used with sklearn.ensemble.BaggingClassifier or sklearn.ensemble.AdaBoostClassifier. |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Estimator node example" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import tpot2\n", + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "knn_configspace = ConfigurationSpace(\n", + " space = {\n", + "\n", + " 'n_neighbors': Integer(\"n_neighbors\", bounds=(1, 10)),\n", + " 'weights': Categorical(\"weights\", ['uniform', 'distance']),\n", + " 'p': Integer(\"p\", bounds=(1, 3)),\n", + " 'metric': Categorical(\"metric\", ['euclidean', 'minkowski']),\n", + " 'n_jobs': 1,\n", + " }\n", + ")\n", + "\n", + "\n", + "knn_node = tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = KNeighborsClassifier,\n", + " space = knn_configspace,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can sample generate an individual with the generate() function. This individual samples from the search space as well as provides mutation and crossover functions to modify the current sample." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled hyperparameters\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 4, 'p': 3, 'weights': 'distance'}\n", + "mutated hyperparameters\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 8, 'p': 2, 'weights': 'uniform'}\n" + ] + } + ], + "source": [ + "knn_individual = knn_node.generate()\n", + "\n", + "print(\"sampled hyperparameters\")\n", + "print(knn_individual.hyperparameters)\n", + "knn_individual.mutate() # mutate the individual\n", + "print(\"mutated hyperparameters\")\n", + "print(knn_individual.hyperparameters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In TPOT2, crossover only modifies the individual calling the crossover function, the second individual remains the same" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original hyperparameters for individual 1\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 6, 'p': 3, 'weights': 'distance'}\n", + "original hyperparameters for individual 2\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}\n", + "\n", + "post crossover hyperparameters for individual 1\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'distance'}\n", + "post crossover hyperparameters for individual 2\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}\n" + ] + } + ], + "source": [ + "knn_individual1 = knn_node.generate()\n", + "knn_individual2 = knn_node.generate()\n", + "\n", + "print(\"original hyperparameters for individual 1\")\n", + "print(knn_individual1.hyperparameters)\n", + "\n", + "print(\"original hyperparameters for individual 2\")\n", + "print(knn_individual2.hyperparameters)\n", + "\n", + "print()\n", + "\n", + "knn_individual1.crossover(knn_individual2) # crossover the individuals\n", + "print(\"post crossover hyperparameters for individual 1\")\n", + "print(knn_individual1.hyperparameters)\n", + "print(\"post crossover hyperparameters for individual 2\")\n", + "print(knn_individual2.hyperparameters)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All search spaces have an export_pipeline function that returns an sklearn `BaseEstimator`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsClassifier(n_jobs=1, p=3, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(n_jobs=1, p=3, weights='distance')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn_individual1.export_pipeline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pipeline Search Spaces" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## choice search space\n", + "\n", + "The simplest pipeline search space is the ChoicePipeline. This takes in a list of search spaces and simply selects and samples from one. In this example, we will construct a search space that takes in several options for a classifier." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import tpot2\n", + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "knn_configspace = ConfigurationSpace(\n", + " space = {\n", + "\n", + " 'n_neighbors': Integer(\"n_neighbors\", bounds=(1, 10)),\n", + " 'weights': Categorical(\"weights\", ['uniform', 'distance']),\n", + " 'p': Integer(\"p\", bounds=(1, 3)),\n", + " 'metric': Categorical(\"metric\", ['euclidean', 'minkowski']),\n", + " 'n_jobs': 1,\n", + " }\n", + ")\n", + "\n", + "lr_configspace = ConfigurationSpace(\n", + " space = {\n", + " 'solver': Categorical(\"solver\", ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),\n", + " 'penalty': Categorical(\"penalty\", ['l1', 'l2']),\n", + " 'dual': Categorical(\"dual\", [True, False]),\n", + " 'C': Float(\"C\", bounds=(1e-4, 1e4), log=True),\n", + " 'class_weight': Categorical(\"class_weight\", ['balanced']),\n", + " 'n_jobs': 1,\n", + " 'max_iter': 1000,\n", + " }\n", + " )\n", + "\n", + "dt_configspace = ConfigurationSpace(\n", + " space = {\n", + " 'criterion': Categorical(\"criterion\", ['gini', 'entropy']),\n", + " 'max_depth': Integer(\"max_depth\", bounds=(1, 11)),\n", + " 'min_samples_split': Integer(\"min_samples_split\", bounds=(2, 21)),\n", + " 'min_samples_leaf': Integer(\"min_samples_leaf\", bounds=(1, 21)),\n", + " 'max_features': Categorical(\"max_features\", ['sqrt', 'log2']),\n", + " 'min_weight_fraction_leaf': 0.0,\n", + " }\n", + " )\n", + "\n", + "knn_node = tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = KNeighborsClassifier,\n", + " space = knn_configspace,\n", + ")\n", + "\n", + "lr_node = tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = LogisticRegression,\n", + " space = lr_configspace,\n", + ")\n", + "\n", + "dt_node = tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = DecisionTreeClassifier,\n", + " space = dt_configspace,\n", + ")\n", + "\n", + "classifier_node = tpot2.search_spaces.pipelines.ChoicePipeline(\n", + " choice_list = [\n", + " knn_node,\n", + " lr_node,\n", + " dt_node,\n", + " ]\n", + ")\n", + "\n", + "\n", + "# tpot2.search_spaces.pipelines.ChoicePipeline(\n", + "# choice_list = [\n", + "# tpot2.search_spaces.nodes.EstimatorNode(\n", + "# method = KNeighborsClassifier,\n", + "# space = knn_configspace,\n", + "# ),\n", + "# tpot2.search_spaces.nodes.EstimatorNode(\n", + "# method = LogisticRegression,\n", + "# space = lr_configspace,\n", + "# ),\n", + "# tpot2.search_spaces.nodes.EstimatorNode(\n", + "# method = DecisionTreeClassifier,\n", + "# space = dt_configspace,\n", + "# ),\n", + "# ]\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Search space objects provided by pipeline search spaces work the same as with node search spaces. Note that crossover only works when both individuals have sampled the same method. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline\n" + ] + }, + { + "data": { + "text/html": [ + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=3)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier_individual = classifier_node.generate()\n", + "\n", + "print(\"sampled pipeline\")\n", + "classifier_individual.export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mutated pipeline\n" + ] + }, + { + "data": { + "text/html": [ + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=7, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=7, p=1)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"mutated pipeline\")\n", + "classifier_individual.mutate()\n", + "classifier_individual.export_pipeline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TPOT2 also comes with predefined search spaces. the helper function `tpot2.config.get_search_space` takes in a string or a list of strings, and returns either a EstimatorNode or a ChoicePipeline,respectively. \n", + "\n", + "strings can correspond to individual methods. Tehre are also special strings that return predefined lists of methods. \n", + "\n", + "Special strings are \"selectors\", \"classifiers\", \"transformers\"\n", + "\n", + "EstimatorNode, GeneticFeatureSelector\n", + "| Special String | Included methods |\n", + "| :--- | :----: |\n", + "| \"selectors\" | \"SelectFwe\", \"SelectPercentile\", \"VarianceThreshold\", \"RFE\", \"SelectFromModel\" |\n", + "| \"classifiers\" | \"LogisticRegression\", \"KNeighborsClassifier\", \"DecisionTreeClassifier\", \"SVC\", \"LinearSVC\", \"RandomForestClassifier\", \"GradientBoostingClassifier\", \"XGBClassifier\", \"LGBMClassifier\", \"ExtraTreesClassifier\", \"SGDClassifier\", \"MLPClassifier\", \"BernoulliNB\", \"MultinomialNB\" |\n", + "| \"transformers\" | \"Binarizer\", \"Normalizer\", \"PCA\", \"ZeroCount\", \"OneHotEncoder\", \"FastICA\", \"FeatureAgglomeration\", \"Nystroem\", \"RBFSampler\" |" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 1\n" + ] + }, + { + "data": { + "text/html": [ + "
DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt',\n",
+       "                       min_samples_leaf=7, min_samples_split=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt',\n", + " min_samples_leaf=7, min_samples_split=5)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#same pipeline search space as before.\n", + "classifier_choice = tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"])\n", + "\n", + "print(\"sampled pipeline 1\")\n", + "classifier_choice.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 2\n" + ] + }, + { + "data": { + "text/html": [ + "
LogisticRegression(C=0.22118566188988883, class_weight='balanced',\n",
+       "                   max_iter=1000, n_jobs=1, solver='sag')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression(C=0.22118566188988883, class_weight='balanced',\n", + " max_iter=1000, n_jobs=1, solver='sag')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"sampled pipeline 2\")\n", + "classifier_choice.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 1\n" + ] + }, + { + "data": { + "text/html": [ + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=89)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=89)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#search space for all classifiers\n", + "classifier_choice = tpot2.config.get_search_space(\"classifiers\")\n", + "\n", + "print(\"sampled pipeline 1\")\n", + "classifier_choice.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 2\n" + ] + }, + { + "data": { + "text/html": [ + "
GradientBoostingClassifier(learning_rate=0.5981565344248039, max_depth=6,\n",
+       "                           max_features=0.14704006316550916,\n",
+       "                           min_samples_leaf=18, min_samples_split=14,\n",
+       "                           subsample=0.36853097212587516)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingClassifier(learning_rate=0.5981565344248039, max_depth=6,\n", + " max_features=0.14704006316550916,\n", + " min_samples_leaf=18, min_samples_split=14,\n", + " subsample=0.36853097212587516)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"sampled pipeline 2\")\n", + "classifier_choice.generate().export_pipeline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sequential Example\n", + "\n", + "SequentialPipelines are of fixed length and sample from a predefined distribution for each step. Here is an example of the form Selector-Transformer-Classifer" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.026228617618654658)),\n",
+       "                ('zerocount', ZeroCount()),\n",
+       "                ('bernoullinb', BernoulliNB(alpha=0.04656547221901433))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.026228617618654658)),\n", + " ('zerocount', ZeroCount()),\n", + " ('bernoullinb', BernoulliNB(alpha=0.04656547221901433))])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stc_pipeline = tpot2.search_spaces.pipelines.SequentialPipeline([\n", + " tpot2.config.get_search_space(\"selectors\"),\n", + " tpot2.config.get_search_space(\"transformers\"),\n", + " tpot2.config.get_search_space(\"classifiers\"),\n", + "])\n", + "\n", + "\n", + "print(\"sampled pipeline\")\n", + "stc_pipeline.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0005298121736972592)),\n",
+       "                ('normalizer', Normalizer()),\n",
+       "                ('mlpclassifier',\n",
+       "                 MLPClassifier(alpha=0.00120637383824527,\n",
+       "                               learning_rate_init=0.001497725714419087))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0005298121736972592)),\n", + " ('normalizer', Normalizer()),\n", + " ('mlpclassifier',\n", + " MLPClassifier(alpha=0.00120637383824527,\n", + " learning_rate_init=0.001497725714419087))])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"sampled pipeline\")\n", + "stc_pipeline.generate().export_pipeline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Optimize Search Space with TPOTEstimator\n", + "\n", + "Once you have constructed a search space, you can use TPOTEstimator to optimize a pipeline within that space." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 100%|██████████| 5/5 [00:48<00:00, 9.63s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
+       "              scorers=['roc_auc'], scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x71f059a54400>,\n",
+       "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n", + " scorers=['roc_auc'], scorers_weights=[1],\n", + " search_space=,\n", + " verbose=2)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tpot2\n", + "import numpy as np\n", + "import sklearn\n", + "import sklearn.datasets\n", + "\n", + "# create dummy dataset\n", + "X, y = sklearn.datasets.make_classification(n_samples=2000, n_features=20, n_classes=2)\n", + "\n", + "# train test split\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.5)\n", + "\n", + "\n", + "#define the search space\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space(\"classifiers\"),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\",\"classifiers\"]),\n", + " max_size = 10,\n", + ")\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + ")\n", + "\n", + "est.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "auroc score 0.9569231877561475\n" + ] + } + ], + "source": [ + "# score the model\n", + "\n", + "auroc_scorer = sklearn.metrics.get_scorer(\"roc_auc\")\n", + "auroc_score = auroc_scorer(est, X_test, y_test)\n", + "\n", + "print(\"auroc score\", auroc_score)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#plot the best pipeline\n", + "est.fitted_pipeline_.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tpot2env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index 19f0f322..f0977acd 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ def calculate_version(): 'dask-ml>=2022.5.27', 'dask-jobqueue>=0.8.1', 'func_timeout>=4.3.5', + 'configspace>=0.7.1', ], extras_require={ 'skrebate': ['skrebate>=0.3.4'], diff --git a/tpot2/__init__.py b/tpot2/__init__.py index ddb8357a..aab11834 100644 --- a/tpot2/__init__.py +++ b/tpot2/__init__.py @@ -2,13 +2,15 @@ #TODO: are all the imports in the init files done correctly? #TODO clean up import organization +from .individual import BaseIndividual + from .graphsklearn import GraphPipeline from .population import Population from . import builtin_modules from . import utils from . import config -from . import individual_representations +from . import search_spaces from . import evolvers from . import objectives from . import selectors diff --git a/tpot2/config/__init__.py b/tpot2/config/__init__.py index e019b78e..7ee03ace 100644 --- a/tpot2/config/__init__.py +++ b/tpot2/config/__init__.py @@ -1,21 +1 @@ -#TODO: make configuration dictionaries optinally based on strings? -from .classifiers import make_classifier_config_dictionary -from .transformers import make_transformer_config_dictionary -from .regressors import make_regressor_config_dictionary -from .selectors import make_selector_config_dictionary -from .special_configs import make_arithmetic_transformer_config_dictionary, make_FSS_config_dictionary, make_passthrough_config_dictionary -from .autoqtl_builtins import make_FeatureEncodingFrequencySelector_config_dictionary, make_genetic_encoders_config_dictionary -from .hyperparametersuggestor import * - -try: - from .classifiers_sklearnex import make_sklearnex_classifier_config_dictionary - from .regressors_sklearnex import make_sklearnex_regressor_config_dictionary -except ModuleNotFoundError: #if optional packages are not installed - pass - -try: - from .mdr_configs import make_skrebate_config_dictionary, make_MDR_config_dictionary, make_ContinuousMDR_config_dictionary -except: #if optional packages are not installed - pass - -from .classifiers import * \ No newline at end of file +from .get_configspace import get_search_space \ No newline at end of file diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 06ed2507..5816b6bb 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -1,270 +1,222 @@ -from sklearn.linear_model import SGDClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier -from sklearn.neural_network import MLPClassifier -from sklearn.tree import DecisionTreeClassifier -from xgboost import XGBClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC -from sklearn.linear_model import LogisticRegression -from lightgbm import LGBMClassifier -from sklearn.svm import LinearSVC - -from functools import partial -#import GaussianNB - -from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB - -import numpy as np - - - -def params_LogisticRegression(trial, random_state=None, name=None): - params = {} - params['solver'] = trial.suggest_categorical(name=f'solver_{name}', - choices=[f'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']) - params['dual'] = False - params['penalty'] = 'l2' - params['C'] = trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True) - params['l1_ratio'] = None - if params['solver'] == 'liblinear': - params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2']) - if params['penalty'] == 'l2': - params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False]) - else: - params['penalty'] = 'l1' - - params['class_weight'] = trial.suggest_categorical(name=f'class_weight_{name}', choices=['balanced']) - param_grid = {'solver': params['solver'], - 'penalty': params['penalty'], - 'dual': params['dual'], - 'multi_class': 'auto', - 'l1_ratio': params['l1_ratio'], - 'C': params['C'], - 'n_jobs': 1, - 'max_iter': 1000, - 'random_state': random_state - } - return param_grid - - -def params_KNeighborsClassifier(trial, name=None, n_samples=10): - return { - 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_samples, log=True ), - 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), - 'p': trial.suggest_int('p', 1, 3), - 'metric': str(trial.suggest_categorical(f'metric_{name}', ['euclidean', 'minkowski'])), - 'n_jobs': 1, - } +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal + + +def get_LogisticRegression_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'solver': Categorical("solver", ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']), + 'penalty': Categorical("penalty", ['l1', 'l2']), + 'dual': Categorical("dual", [True, False]), + 'C': Float("C", bounds=(1e-4, 1e4), log=True), + 'class_weight': Categorical("class_weight", ['balanced']), + 'n_jobs': 1, + 'max_iter': 1000, + } + ) -def params_DecisionTreeClassifier(trial, random_state=None, name=None): - return { - 'criterion': trial.suggest_categorical(f'criterion_{name}', ['gini', 'entropy']), - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 11), - # 'max_depth_factor' : trial.suggest_float(f'max_depth_factor_{name}', 0, 2, step=0.1), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - 'min_weight_fraction_leaf': 0.0, - 'max_features': trial.suggest_categorical(f'max_features_{name}', [ 'sqrt', 'log2']), - 'max_leaf_nodes': None, - 'random_state': random_state - } +def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): + return ConfigurationSpace( + space = { -def params_SVC(trial, random_state=None, name=None): - return { - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - #'gamma': trial.suggest_categorical(name='fgamma_{name}', choices=['scale', 'auto']), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), - 'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']), - #'coef0': trial.suggest_float(f'coef0_{name}', 0, 10, step=0.1), - 'max_iter': 3000, - 'tol': 0.005, - 'probability': True, - 'random_state': random_state - } + 'n_neighbors': Integer("n_neighbors", bounds=(1, max(50,n_samples))), + 'weights': Categorical("weights", ['uniform', 'distance']), + 'p': Integer("p", bounds=(1, 3)), + 'metric': Categorical("metric", ['euclidean', 'minkowski']), + 'n_jobs': 1, + } + ) -def params_LinearSVC(trial, random_state=None, name=None): +def get_DecisionTreeClassifier_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'criterion': Categorical("criterion", ['gini', 'entropy']), + 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'max_features': Categorical("max_features", ['sqrt', 'log2']), + 'min_weight_fraction_leaf': 0.0, + } + ) - penalty = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2']) - if penalty == 'l1': - loss = 'squared_hinge' - else: - loss = trial.suggest_categorical(name=f'loss_{name}', choices=['hinge', 'squared_hinge']) - if loss == 'hinge' and penalty == 'l2': - dual = True - else: - dual = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False]) - - return { - 'penalty': penalty, - 'loss': loss, - 'dual': dual, - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'random_state': random_state - } +def get_SVC_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), + #'class_weight': Categorical("class_weight", [None, 'balanced']), #TODO add class_weight. configspace doesn't allow None as a value. + 'max_iter': 3000, + 'tol': Float("tol", bounds=(0.001, 0.01)), + 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical + } + ) + +def get_LinearSVC_ConfigurationSpace(random_state=None,): + space = { + 'penalty': Categorical("penalty", ['l1', 'l2']), + 'loss': Categorical("loss", ['hinge', 'squared_hinge']), + 'dual': Categorical("dual", [True, False]), + 'C': Float("C", bounds=(1e-4, 25), log=True), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) -def params_RandomForestClassifier(trial, random_state=None, name=None): - params = { - 'n_estimators': 100, - 'criterion': trial.suggest_categorical(name=f'criterion_{name}', choices=['gini', 'entropy']), - #'max_features': trial.suggest_categorical('max_features_{name}', ['auto', 'sqrt', 'log2']), - 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 20), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 20), - 'n_jobs': 1, - 'random_state': random_state - } - return params -def params_GradientBoostingClassifier(trial, random_state=None, n_classes=None, name=None): +def get_RandomForestClassifier_ConfigurationSpace(random_state=None): + space = { + 'criterion': Categorical("criterion", ['gini', 'entropy']), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'bootstrap': Categorical("bootstrap", [True, False]), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_GradientBoostingClassifier_ConfigurationSpace(n_classes=None): + if n_classes is not None and n_classes > 2: loss = 'log_loss' else: - loss = trial.suggest_categorical(name=f'loss_{name}', choices=['log_loss', 'exponential']) - - params = { - 'n_estimators': 100, - 'loss': loss, - 'learning_rate': trial.suggest_float(f'learning_rate_{name}', 1e-3, 1, log=True), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 20), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 20), - 'subsample': trial.suggest_float(f'subsample_{name}', 0.1, 1.0), - 'max_features': trial.suggest_float(f'max_features_{name}', 0.1, 1.0), - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 10), - 'tol': 1e-4, - 'random_state': random_state - } - return params - - -def params_XGBClassifier(trial, random_state=None, name=None): - return { - 'learning_rate': trial.suggest_float(f'learning_rate_{name}', 1e-3, 1, log=True), - 'subsample': trial.suggest_float(f'subsample_{name}', 0.1, 1.0), - 'min_child_weight': trial.suggest_int(f'min_child_weight_{name}', 1, 21), - #'booster': trial.suggest_categorical(name='booster_{name}', choices=['gbtree', 'dart']), - 'n_estimators': 100, - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 11), - 'n_jobs': 1, - #'use_label_encoder' : True, - 'random_state': random_state - } + loss = Categorical("loss", ['log_loss', 'exponential']) + + return ConfigurationSpace( + space = { + 'n_estimators': 100, + 'loss': loss, + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'max_features': Float("max_features", bounds=(0.1, 1.0)), + 'max_depth': Integer("max_depth", bounds=(1, 10)), + } + ) -def params_LGBMClassifier(trial, random_state=None, name=None): - params = { - 'objective': 'binary', - 'metric': 'binary_logloss', - 'boosting_type': trial.suggest_categorical(name=f'boosting_type_{name}', choices=['gbdt', 'dart', 'goss']), - 'num_leaves': trial.suggest_int(f'num_leaves_{name}', 2, 256), - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 10), - 'n_estimators': trial.suggest_int(f'n_estimators_{name}', 10, 100), # 200-6000 by 200 - 'deterministic': True, - 'force_row_wise': True, - 'n_jobs': 1, - 'random_state': random_state +def get_XGBClassifier_ConfigurationSpace(random_state=None,): + + space = { + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'min_child_weight': Integer("min_child_weight", bounds=(1, 21)), + 'max_depth': Integer("max_depth", bounds=(1, 11)), + } - } - if 2 ** params['max_depth'] > params['num_leaves']: - params['num_leaves'] = 2 ** params['max_depth'] - return params - - -def params_ExtraTreesClassifier(trial, random_state=None, name=None): - params = { - 'n_estimators': 100, - 'criterion': trial.suggest_categorical(name=f'criterion_{name}', choices=["gini", "entropy"]), - 'max_features': trial.suggest_float('max_features', 0.05, 1.00), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21,step=1), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21, step=1), - 'bootstrap': trial.suggest_categorical(f'bootstrap_{name}', [True, False]), - 'n_jobs': 1, - 'random_state': random_state - } - return params - -def params_SGDClassifier(trial, random_state=None, name=None): - params = { - 'loss': trial.suggest_categorical(f'loss_{name}', ['log_loss', 'modified_huber',]), - 'penalty': 'elasticnet', - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-5, 0.01, log=True), - 'learning_rate': trial.suggest_categorical(f'learning_rate_{name}', ['invscaling', 'constant']), - 'fit_intercept': True, - 'l1_ratio': trial.suggest_float(f'l1_ratio_{name}', 0.0, 1.0), - 'eta0': trial.suggest_float(f'eta0_{name}', 0.01, 1.0), - 'power_t': trial.suggest_float(f'power_t_{name}', 1e-5, 100.0, log=True), - 'n_jobs': 1, - 'random_state': random_state - } + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_LGBMClassifier_ConfigurationSpace(random_state=None,): + + space = { + 'objective': 'binary', + 'metric': 'binary_logloss', + 'boosting_type': Categorical("boosting_type", ['gbdt', 'dart', 'goss']), + 'num_leaves': Integer("num_leaves", bounds=(2, 256)), + 'max_depth': Integer("max_depth", bounds=(1, 10)), + 'n_estimators': Integer("n_estimators", bounds=(10, 100)), + 'n_jobs': 1, + } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state -def params_MLPClassifier_tpot(trial, random_state=None, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-4, 1e-1, log=True), - 'learning_rate_init': trial.suggest_float(f'learning_rate_init_{name}', 1e-3, 1., log=True), - 'random_state': random_state - } + return ConfigurationSpace( + space=space + ) - return params -def params_MLPClassifier_large(trial, name=None): - n_layers = trial.suggest_int(f'n_layers_{name}', 2, 3) - layers = [] - for i in range(n_layers): - layers.append(trial.suggest_int(f'n_neurons_{i}_{name}', 4, 128)) +def get_ExtraTreesClassifier_ConfigurationSpace(random_state=None): + space = { + 'n_estimators': Integer("n_estimators", bounds=(10, 500)), + 'criterion': Categorical("criterion", ["gini", "entropy"]), + 'max_features': Float("max_features", bounds=(0.05, 1.00)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'bootstrap': Categorical("bootstrap", [True, False]), + 'n_jobs': 1, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + + + +def get_SGDClassifier_ConfigurationSpace(random_state=None): + + space = { + 'loss': Categorical("loss", ['log_loss', 'modified_huber']), + 'penalty': 'elasticnet', + 'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True), + 'learning_rate': Categorical("learning_rate", ['invscaling', 'constant']), + 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), + 'eta0': Float("eta0", bounds=(0.01, 1.0)), + 'power_t': Float("power_t", bounds=(1e-5, 100.0), log=True), + 'n_jobs': 1, + 'fit_intercept': Categorical("fit_intercept", [True]), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state - params = { - 'activation': trial.suggest_categorical(name=f'activation_{name}', choices=['identity', 'logistic', 'tanh', 'relu']), - 'solver': trial.suggest_categorical(name=f'solver_{name}', choices=['lbfgs', 'sgd', 'adam']), - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0001, 1.0, log=True), - 'hidden_layer_sizes': tuple(layers), - 'max_iter' : 10000 - } + return ConfigurationSpace( + space = space + ) - return params -def params_BernoulliNB(trial, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-3, 100, log=True), - 'fit_prior': trial.suggest_categorical(f'fit_prior_{name}', [True, False]), +def get_MLPClassifier_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(1e-4, 1e-1), log=True), + 'learning_rate_init': Float("learning_rate_init", bounds=(1e-3, 1.), log=True), } - return params + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + + +def get_BernoulliNB_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'alpha': Float("alpha", bounds=(1e-3, 100), log=True), + 'fit_prior': Categorical("fit_prior", [True, False]), + } + ) -def params_MultinomialNB(trial, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-3, 100, log=True), - 'fit_prior': trial.suggest_categorical(f'fit_prior_{name}', [True, False]), - } - return params - - -def make_classifier_config_dictionary(random_state=None, n_samples=10, n_classes=None): - n_samples = min(n_samples,100) #TODO optimize this - - return { - LogisticRegression: partial(params_LogisticRegression, random_state=random_state), - DecisionTreeClassifier: partial(params_DecisionTreeClassifier, random_state=random_state), - KNeighborsClassifier: partial(params_KNeighborsClassifier,n_samples=n_samples), - GradientBoostingClassifier: partial(params_GradientBoostingClassifier, random_state=random_state, n_classes=n_classes), - ExtraTreesClassifier: partial(params_ExtraTreesClassifier, random_state=random_state), - RandomForestClassifier: partial(params_RandomForestClassifier, random_state=random_state), - SGDClassifier: partial(params_SGDClassifier, random_state=random_state), - GaussianNB: {}, - BernoulliNB: params_BernoulliNB, - MultinomialNB: params_MultinomialNB, - XGBClassifier: partial(params_XGBClassifier, random_state=random_state), - #LinearSVC: partial(params_LinearSVC, random_state=random_state), - SVC: partial(params_SVC, random_state=random_state), - #: params_LGBMClassifier, # logistic regression and SVM/SVC are just special cases of this one? remove? - MLPClassifier: partial(params_MLPClassifier_tpot, random_state=random_state), +def get_MultinomialNB_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'alpha': Float("alpha", bounds=(1e-3, 100), log=True), + 'fit_prior': Categorical("fit_prior", [True, False]), } + ) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py new file mode 100644 index 00000000..7a9e552e --- /dev/null +++ b/tpot2/config/get_configspace.py @@ -0,0 +1,186 @@ +from ..search_spaces.nodes import EstimatorNode +from ..search_spaces.pipelines import ChoicePipeline + +from .classifiers import * +from .transformers import * +from .regressors import * +from .selectors import * + + +from sklearn.linear_model import SGDClassifier +from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.tree import DecisionTreeClassifier +from xgboost import XGBClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.linear_model import LogisticRegression +from lightgbm import LGBMClassifier +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB +from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier + + +from tpot2.builtin_modules import ZeroCount, OneHotEncoder, ColumnOneHotEncoder +from sklearn.preprocessing import Binarizer +from sklearn.decomposition import FastICA +from sklearn.cluster import FeatureAgglomeration +from sklearn.preprocessing import MaxAbsScaler +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import Normalizer +from sklearn.kernel_approximation import Nystroem +from sklearn.decomposition import PCA +from sklearn.preprocessing import PolynomialFeatures +from sklearn.kernel_approximation import RBFSampler +from sklearn.preprocessing import RobustScaler +from sklearn.preprocessing import StandardScaler + + +from sklearn.feature_selection import SelectFwe +from sklearn.feature_selection import SelectPercentile +from sklearn.feature_selection import VarianceThreshold +from sklearn.feature_selection import RFE +from sklearn.feature_selection import SelectFromModel + +import sklearn.feature_selection + + +from sklearn.feature_selection import f_classif +from sklearn.feature_selection import f_regression + + + +from tpot2.builtin_modules import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor + +STRING_TO_CLASS = { + #classifiers + "LogisticRegression": LogisticRegression, + "KNeighborsClassifier": KNeighborsClassifier, + "DecisionTreeClassifier": DecisionTreeClassifier, + "SVC": SVC, + "LinearSVC": LinearSVC, + "RandomForestClassifier": RandomForestClassifier, + "GradientBoostingClassifier": GradientBoostingClassifier, + "XGBClassifier": XGBClassifier, + "LGBMClassifier": LGBMClassifier, + "ExtraTreesClassifier": ExtraTreesClassifier, + "SGDClassifier": SGDClassifier, + "MLPClassifier": MLPClassifier, + "BernoulliNB": BernoulliNB, + "MultinomialNB": MultinomialNB, + + #transformers + "Binarizer": Binarizer, + "Normalizer": Normalizer, + "PCA": PCA, + "ZeroCount": ZeroCount, + "OneHotEncoder": ColumnOneHotEncoder, + "FastICA": FastICA, + "FeatureAgglomeration": FeatureAgglomeration, + "Nystroem": Nystroem, + "RBFSampler": RBFSampler, + + #selectors + "SelectFwe": SelectFwe, + "SelectPercentile": SelectPercentile, + "VarianceThreshold": VarianceThreshold, + "RFE": RFE, + "SelectFromModel": SelectFromModel, +} + + + + +def get_configspace(name, n_classes=3, n_samples=100, random_state=None): + match name: + #classifiers.py + case "LogisticRegression": + return get_LogisticRegression_ConfigurationSpace() + case "KNeighborsClassifier": + return get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) + case "DecisionTreeClassifier": + return get_DecisionTreeClassifier_ConfigurationSpace() + case "SVC": + return get_SVC_ConfigurationSpace() + case "LinearSVC": + return get_LinearSVC_ConfigurationSpace() + case "RandomForestClassifier": + return get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) + case "GradientBoostingClassifier": + return get_GradientBoostingClassifier_ConfigurationSpace(n_classes=n_classes) + case "XGBClassifier": + return get_XGBClassifier_ConfigurationSpace(random_state=random_state) + case "LGBMClassifier": + return get_LGBMClassifier_ConfigurationSpace(random_state=random_state) + case "ExtraTreesClassifier": + return get_ExtraTreesClassifier_ConfigurationSpace(random_state=random_state) + case "SGDClassifier": + return get_SGDClassifier_ConfigurationSpace(random_state=random_state) + case "MLPClassifier": + return get_MLPClassifier_ConfigurationSpace(random_state=random_state) + case "BernoulliNB": + return get_BernoulliNB_ConfigurationSpace() + case "MultinomialNB": + return get_MultinomialNB_ConfigurationSpace() + + #transformers.py + case "Binarizer": + return Binarizer_configspace + case "Normalizer": + return Normalizer_configspace + case "PCA": + return PCA_configspace + case "ZeroCount": + return ZeroCount_configspace + case "OneHotEncoder": + return OneHotEncoder_configspace + case "FastICA": + return get_FastICA_configspace() + case "FeatureAgglomeration": + return get_FeatureAgglomeration_configspace() + case "Nystroem": + return get_Nystroem_configspace() + case "RBFSampler": + return get_RBFSampler_configspace() + + #selectors.py + case "SelectFwe": + return SelectFwe_configspace + case "SelectPercentile": + return SelectPercentile_configspace + case "VarianceThreshold": + return VarianceThreshold_configspace + case "RFE": + return RFE_configspace_part + case "SelectFromModel": + return SelectFromModel_configspace_part + + +def check_for_special(name): + match name: + case "selectors": + return ["SelectFwe", "SelectPercentile", "VarianceThreshold",] + case "classifiers": + return ["LogisticRegression", "KNeighborsClassifier", "DecisionTreeClassifier", "SVC", "RandomForestClassifier", "GradientBoostingClassifier", "XGBClassifier", "ExtraTreesClassifier", "SGDClassifier", "MLPClassifier", "BernoulliNB", "MultinomialNB"] + case "transformers": + return ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"] + + return name + + +def get_search_space(name, n_classes=3, n_samples=100, random_state=None): + name = check_for_special(name) + + #if list of names, return a list of EstimatorNodes + if isinstance(name, list) or isinstance(name, np.ndarray): + search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, random_state=random_state) for n in name] + return ChoicePipeline(choice_list=search_spaces) + else: + return get_estimatornode(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + + +def get_estimatornode(name, n_classes=3, n_samples=100, random_state=None): + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + + + return EstimatorNode(STRING_TO_CLASS[name], configspace) diff --git a/tpot2/config/hyperparametersuggestor.py b/tpot2/config/hyperparametersuggestor.py deleted file mode 100644 index 1d3ad1f0..00000000 --- a/tpot2/config/hyperparametersuggestor.py +++ /dev/null @@ -1,194 +0,0 @@ -# import random -# from scipy.stats import loguniform, logser #TODO: remove this dependency? -import numpy as np - -#function that selects selects items from a list with each having independent probability p of being selected -def select(items, p, rng_=None): - rng = np.random.default_rng(rng_) - - selected = [item for item in items if rng.random() < p] - #if selected is empty, select one item at random - if not selected: - return [rng.choice(items)] - return selected - - -class Trial(): - - def __init__(self, rng_=None, old_params=None, alpha=1, hyperparameter_probability=1): - self.rng = np.random.default_rng(rng_) - - self._params = dict() - - self.old_params = old_params - self.alpha = alpha - self.hyperparameter_probability = hyperparameter_probability - - if old_params is not None and len(old_params) > 0: - self.params_to_update = select(list(old_params.keys()), self.hyperparameter_probability, rng_=self.rng) - else: - self.params_to_update = None - - - #Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html - #copy-pasted some code - def suggest_categorical(self, name, choices): - if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: #If this parameter is selected to be changed - choice = self.suggest_categorical_(name, choices) - else: #if this parameter is not selected to be changed - choice = self.old_params[name] - if choice not in choices: #if the old value is not in the choices, then we need to choose a value for it - choice = self.suggest_categorical_(name, choices) - - self._params[name] = choice - return choice - - def suggest_float(self, - name: str, - low: float, - high: float, - *, - step = None, - log = False, - ): - if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: #If this parameter is selected to be changed - choice = self.suggest_float_(name, low=low, high=high, step=step, log=log) - if self.old_params is not None and name in self.old_params: - choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] - else: #if this parameter is not selected to be changed - choice = self.old_params[name] - - self._params[name] = choice - return choice - - - - def suggest_discrete_uniform(self, name, low, high, q): - if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: - choice = self.suggest_discrete_uniform_(name, low=low, high=high, q=q) - if self.old_params is not None and name in self.old_params: - choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] - else: - choice = self.old_params[name] - - self._params[name] = choice - return choice - - - - def suggest_int(self, name, low, high, step=1, log=False): - if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: - choice = self.suggest_int_(name, low=low, high=high, step=step, log=log) - if self.old_params is not None and name in self.old_params: - choice = int(self.alpha*choice + (1-self.alpha)*self.old_params[name]) - else: - choice = self.old_params[name] - - self._params[name] = choice - return choice - - - def suggest_uniform(self, name, low, high): - if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: - choice = self.suggest_uniform_(name, low=low, high=high) - if self.old_params is not None and name in self.old_params: - choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] - else: - choice = self.old_params[name] - - self._params[name] = choice - return choice - - - -#################################### - #Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html - #copy-pasted some code - def suggest_categorical_(self, name, choices): - - choice = self.rng.choice(choices) - return choice - - def suggest_float_(self, - name: str, - low: float, - high: float, - *, - step = None, - log = False, - ): - - if log and step is not None: - raise ValueError("The parameter `step` is not supported when `log` is true.") - - if low > high: - raise ValueError( - "The `low` value must be smaller than or equal to the `high` value " - "(low={}, high={}).".format(low, high) - ) - - if log and low <= 0.0: - raise ValueError( - "The `low` value must be larger than 0 for a log distribution " - "(low={}, high={}).".format(low, high) - ) - - if step is not None and step <= 0: - raise ValueError( - "The `step` value must be non-zero positive value, " "but step={}.".format(step) - ) - - #TODO check this produces correct output - if log: - value = self.rng.uniform(np.log(low),np.log(high)) - choice = np.e**value - return choice - - else: - if step is not None: - choice = self.rng.choice(np.arange(low,high,step)) - return choice - else: - choice = self.rng.uniform(low,high) - return choice - - - def suggest_discrete_uniform_(self, name, low, high, q): - choice = self.suggest_float(name, low, high, step=q) - return choice - - - def suggest_int_(self, name, low, high, step=1, log=False): - if low == high: #TODO check that this matches optuna's behaviour - return low - - if log and step >1: - raise ValueError("The parameter `step`>1 is not supported when `log` is true.") - - if low > high: - raise ValueError( - "The `low` value must be smaller than or equal to the `high` value " - "(low={}, high={}).".format(low, high) - ) - - if log and low <= 0.0: - raise ValueError( - "The `low` value must be larger than 0 for a log distribution " - "(low={}, high={}).".format(low, high) - ) - - if step is not None and step <= 0: - raise ValueError( - "The `step` value must be non-zero positive value, " "but step={}.".format(step) - ) - - if log: - value = self.rng.uniform(np.log(low),np.log(high)) - choice = int(np.e**value) - return choice - else: - choice = self.rng.choice(list(range(low,high,step))) - return choice - - def suggest_uniform_(self, name, low, high): - return self.suggest_float(name, low, high) \ No newline at end of file diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py new file mode 100644 index 00000000..89bcb60d --- /dev/null +++ b/tpot2/config/imputers.py @@ -0,0 +1,2 @@ +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal \ No newline at end of file diff --git a/tpot2/config/mdr_configs.py b/tpot2/config/mdr_configs.py index 1fe7cc7a..4f872bd6 100644 --- a/tpot2/config/mdr_configs.py +++ b/tpot2/config/mdr_configs.py @@ -1,60 +1,52 @@ from mdr import MDR, ContinuousMDR from skrebate import ReliefF, SURF, SURFstar, MultiSURF from functools import partial +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -#MDR -def params_MDR(trial, name=None): - return { - 'tie_break': trial.suggest_categorical(name=f'tie_break_{name}', choices=[0,1]), - 'default_label': trial.suggest_categorical(name=f'default_label_{name}', choices=[0,1]), - } -def params_ContinuousMDR(trial, name=None): - return { - 'tie_break': trial.suggest_categorical(name=f'tie_break_{name}', choices=[0,1]), - 'default_label': trial.suggest_categorical(name=f'default_label_{name}', choices=[0,1]), - } - -#skrebate -def params_skrebate_ReliefF(trial, name=None, n_features=10): - return { - 'n_features_to_select': trial.suggest_int(f'n_features_to_select_{name}', 1, n_features, log=True), - 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 2, 500, log=True), +#MDR +MDR_configspace = ConfigurationSpace( + space = { + 'tie_break': Categorical('tie_break', [0,1]), + 'default_label': Categorical('default_label', [0,1]), } +) -def params_skrebate_SURF(trial, name=None, n_features=10): - return { - 'n_features_to_select': trial.suggest_int(f'n_features_to_select_{name}', 1, n_features, log=True), +MDR_configspace = ConfigurationSpace( + space = { + 'tie_break': Categorical('tie_break', [0,1]), + 'default_label': Categorical('default_label', [0,1]), } +) -def params_skrebate_SURFstar(trial, name=None, n_features=10): - return { - 'n_features_to_select': trial.suggest_int(f'n_features_to_select_{name}', 1, n_features, log=True), - } -def params_skrebate_MultiSURF(trial, name=None, n_features=10): - return { - 'n_features_to_select': trial.suggest_int(f'n_features_to_select_{name}', 1, n_features, log=True), +skrebate_ReliefF_configspace = ConfigurationSpace( + space = { + 'n_features_to_select': Integer('n_features_to_select', bounds=(1, 10), log=True), + 'n_neighbors': Integer('n_neighbors', bounds=(1,500), log=True), } +) +def make_skrebate_SURF_config_space(n_features=10): + return ConfigurationSpace( + space = { + 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), + } +) -def make_skrebate_config_dictionary(n_features=10): - return { - ReliefF : partial(params_skrebate_ReliefF, n_features=n_features), - SURF : partial(params_skrebate_SURF, n_features=n_features), - SURFstar : partial(params_skrebate_SURFstar, n_features=n_features), - MultiSURF: partial(params_skrebate_MultiSURF,n_features=n_features), - } - - -def make_MDR_config_dictionary(): - return { - MDR : params_MDR - } -def make_ContinuousMDR_config_dictionary(): - return { - ContinuousMDR : params_ContinuousMDR - } \ No newline at end of file +def make_skrebate_SURFstar_config_space(n_features=10): + return ConfigurationSpace( + space = { + 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), + } +) +def make_skrebate_MultiSURF_config_space(n_features=10): + return ConfigurationSpace( + space = { + 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), + } +) diff --git a/tpot2/config/selectors.py b/tpot2/config/selectors.py index 42589d83..9dc1ebe9 100644 --- a/tpot2/config/selectors.py +++ b/tpot2/config/selectors.py @@ -1,113 +1,41 @@ #TODO: how to best support transformers/selectors that take other transformers with their own hyperparameters? import numpy as np -from sklearn.feature_selection import SelectFwe -from sklearn.feature_selection import SelectPercentile -from sklearn.feature_selection import VarianceThreshold -from sklearn.feature_selection import RFE -from sklearn.feature_selection import SelectFromModel -import sklearn.feature_selection -from functools import partial -from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier -from tpot2.builtin_modules import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor +import sklearn -from .classifiers import params_ExtraTreesClassifier -from .regressors import params_ExtraTreesRegressor +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -def params_sklearn_feature_selection_SelectFwe(trial, name=None): - return { - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-4, 0.05, log=True), - 'score_func' : sklearn.feature_selection.f_classif, +SelectFwe_configspace = ConfigurationSpace( + space = { + 'alpha': Float('alpha', bounds=(1e-4, 0.05), log=True), } +) -def params_sklearn_feature_selection_SelectPercentile(trial, name=None): - return { - 'percentile': trial.suggest_float(f'percentile_{name}', 1, 100.0), - 'score_func' : sklearn.feature_selection.f_classif, - } -def params_sklearn_feature_selection_VarianceThreshold(trial, name=None): - return { - 'threshold': trial.suggest_float(f'threshold_{name}', 1e-4, .2, log=True) +SelectPercentile_configspace = ConfigurationSpace( + space = { + 'percentile': Float('percentile', bounds=(1, 100.0)), } +) - -#TODO add more estimator options? How will that interact with optuna? -def params_sklearn_feature_selection_RFE(trial, random_state=None, name=None, classifier=True): - - if classifier: - estimator = ExtraTreesClassifier(**params_ExtraTreesClassifier(trial, random_state=random_state, name=f"RFE_{name}")) - else: - estimator = ExtraTreesRegressor(**params_ExtraTreesRegressor(trial, random_state=random_state, name=f"RFE_{name}")) - - params = { - 'step': trial.suggest_float(f'step_{name}', 1e-4, 1.0, log=False), - 'estimator' : estimator, - } - - return params - - -def params_sklearn_feature_selection_SelectFromModel(trial, random_state=None, name=None, classifier=True): - - if classifier: - estimator = ExtraTreesClassifier(**params_ExtraTreesClassifier(trial, random_state=random_state, name=f"SFM_{name}")) - else: - estimator = ExtraTreesRegressor(**params_ExtraTreesRegressor(trial, random_state=random_state, name=f"SFM_{name}")) - - params = { - 'threshold': trial.suggest_float(f'threshold_{name}', 1e-4, 1.0, log=True), - 'estimator' : estimator, - } - - return params - - - -def params_sklearn_feature_selection_RFE_wrapped(trial, random_state=None, name=None, classifier=True): - - params = { - 'step': trial.suggest_float(f'step_{name}', 1e-4, 1.0, log=False), - } - - if classifier: - estimator_params = params_ExtraTreesClassifier(trial, random_state=random_state, name=f"RFE_{name}") - else: - estimator_params = params_ExtraTreesRegressor(trial, random_state=random_state, name=f"RFE_{name}") - - params.update(estimator_params) - - return params - - -def params_sklearn_feature_selection_SelectFromModel_wrapped(trial, random_state=None, name=None, classifier=True): - - params = { - 'threshold': trial.suggest_float(f'threshold_{name}', 1e-4, 1.0, log=True), - } - - if classifier: - estimator_params = params_ExtraTreesClassifier(trial, random_state=random_state, name=f"SFM_{name}") - else: - estimator_params = params_ExtraTreesRegressor(trial, random_state=random_state, name=f"SFM_{name}") - - params.update(estimator_params) - - return params - +VarianceThreshold_configspace = ConfigurationSpace( + space = { + 'threshold': Float('threshold', bounds=(1e-4, .2), log=True), + } +) -def make_selector_config_dictionary(random_state=None, classifier=True): - if classifier: - params = {RFE_ExtraTreesClassifier : partial(params_sklearn_feature_selection_RFE_wrapped, random_state=random_state, classifier=classifier), - SelectFromModel_ExtraTreesClassifier : partial(params_sklearn_feature_selection_SelectFromModel_wrapped, random_state=random_state, classifier=classifier), - } - else: - params = {RFE_ExtraTreesRegressor : partial(params_sklearn_feature_selection_RFE_wrapped, random_state=random_state, classifier=classifier), - SelectFromModel_ExtraTreesRegressor : partial(params_sklearn_feature_selection_SelectFromModel_wrapped, random_state=random_state, classifier=classifier), - } - params.update({ SelectFwe: params_sklearn_feature_selection_SelectFwe, - SelectPercentile: params_sklearn_feature_selection_SelectPercentile, - VarianceThreshold: params_sklearn_feature_selection_VarianceThreshold,}) +# Note the RFE_configspace_part and SelectFromModel_configspace_part are not complete, they both require the estimator to be set. +# These are indended to be used with the Wrapped search space. +RFE_configspace_part = ConfigurationSpace( + space = { + 'step': Float('step', bounds=(1e-4, 1.0)), + } +) - return params \ No newline at end of file +SelectFromModel_configspace_part = ConfigurationSpace( + space = { + 'threshold': Float('threshold', bounds=(1e-4, 1.0), log=True), + } +) diff --git a/tpot2/config/transformers.py b/tpot2/config/transformers.py index fe869411..fca4932c 100644 --- a/tpot2/config/transformers.py +++ b/tpot2/config/transformers.py @@ -1,103 +1,78 @@ -from functools import partial -import numpy as np - -from tpot2.builtin_modules import ZeroCount, OneHotEncoder, ColumnOneHotEncoder -from sklearn.preprocessing import Binarizer -from sklearn.decomposition import FastICA -from sklearn.cluster import FeatureAgglomeration -from sklearn.preprocessing import MaxAbsScaler -from sklearn.preprocessing import MinMaxScaler -from sklearn.preprocessing import Normalizer -from sklearn.kernel_approximation import Nystroem -from sklearn.decomposition import PCA -from sklearn.preprocessing import PolynomialFeatures -from sklearn.kernel_approximation import RBFSampler -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import StandardScaler - - -def params_sklearn_preprocessing_Binarizer(trial, name=None): - return { - 'threshold': trial.suggest_float(f'threshold_{name}', 0.0, 1.0), +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal + + +Binarizer_configspace = ConfigurationSpace( + space = { + 'threshold': Float('threshold', bounds=(0.0, 1.0)), } +) + +Normalizer_configspace = ConfigurationSpace( + space={'norm': Categorical('norm', ['l1', 'l2', 'max'])} +) + +PCA_configspace = ConfigurationSpace( + space={'n_components': Float('n_components', bounds=(0.001, 0.999))} +) + +ZeroCount_configspace = ConfigurationSpace() + +OneHotEncoder_configspace = ConfigurationSpace() #TODO include the parameter for max unique values -def params_sklearn_decomposition_FastICA(trial, random_state=None, name=None, n_features=100): - return { - 'n_components': trial.suggest_int(f'n_components_{name}', 1, n_features), # number of components wrt number of features - 'algorithm': trial.suggest_categorical(f'algorithm_{name}', ['parallel', 'deflation']), +def get_FastICA_configspace(n_features=100, random_state=None): + + space = { + 'n_components': Integer('n_components', bounds=(1, n_features)), + 'algorithm': Categorical('algorithm', ['parallel', 'deflation']), 'whiten':'unit-variance', - 'random_state': random_state } - -def params_sklearn_cluster_FeatureAgglomeration(trial, name=None, n_features=100): - - linkage = trial.suggest_categorical(f'linkage_{name}', ['ward', 'complete', 'average']) - if linkage == 'ward': - metric = 'euclidean' - else: - metric = trial.suggest_categorical(f'metric_{name}', ['euclidean', 'l1', 'l2', 'manhattan', 'cosine']) - return { - 'linkage': linkage, - 'metric': metric, - 'n_clusters': trial.suggest_int(f'n_clusters_{name}', 2, n_features-1), #TODO perhaps a percentage of n_features + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + + ) + +def get_FeatureAgglomeration_configspace(n_features=100): + return ConfigurationSpace( + space = { + 'linkage': Categorical('linkage', ['ward', 'complete', 'average']), + 'metric': Categorical('metric', ['euclidean', 'l1', 'l2', 'manhattan', 'cosine']), + 'n_clusters': Integer('n_clusters', bounds=(2, n_features-1)), + } + ) + +def get_Nystroem_configspace(n_features=100, random_state=None,): + + space = { + 'gamma': Float('gamma', bounds=(0.0, 1.0)), + 'kernel': Categorical('kernel', ['rbf', 'cosine', 'chi2', 'laplacian', 'polynomial', 'poly', 'linear', 'additive_chi2', 'sigmoid']), + 'n_components': Integer('n_components', bounds=(1, n_features)), } -def params_sklearn_preprocessing_Normalizer(trial, name=None): - return { - 'norm': trial.suggest_categorical(f'norm_{name}', ['l1', 'l2', 'max']), - } -def params_sklearn_kernel_approximation_Nystroem(trial, random_state=None, name=None, n_features=100): - return { - 'gamma': trial.suggest_float(f'gamma_{name}', 0.0, 1.0), - 'kernel': trial.suggest_categorical(f'kernel_{name}', ['rbf', 'cosine', 'chi2', 'laplacian', 'polynomial', 'poly', 'linear', 'additive_chi2', 'sigmoid']), - 'n_components': trial.suggest_int(f'n_components_{name}', 1, n_features), - 'random_state': random_state - } + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space -def params_sklearn_decomposition_PCA(trial, random_state=None, name=None, n_features=100): - # keep the number of components required to explain 'variance_explained' of the variance - variance_explained = 1.0 - trial.suggest_float(f'n_components_{name}', 0.001, 0.5, log=True) #values closer to 1 are more likely + ) - return { - 'n_components': variance_explained, - 'random_state': random_state - } +def get_RBFSampler_configspace(n_features=100, random_state=None): -def params_sklearn_kernel_approximation_RBFSampler(trial, random_state=None, name=None, n_features=100): - return { - 'n_components': trial.suggest_int(f'n_components_{name}', 1, n_features), - 'gamma': trial.suggest_float(f'gamma_{name}', 0.0, 1.0), - 'random_state': random_state + space = { + 'gamma': Float('gamma', bounds=(0.0, 1.0)), + 'n_components': Integer('n_components', bounds=(1, n_features)), } -def params_tpot_builtins_ZeroCount(trial, name=None): - - return {} - -def params_tpot_builtins_OneHotEncoder(trial, name=None): - - return {} - -def make_transformer_config_dictionary(random_state=None, n_features=10): - #n_features = min(n_features,100) #TODO optimize this - return { - Binarizer: params_sklearn_preprocessing_Binarizer, - FastICA: partial(params_sklearn_decomposition_FastICA, random_state=random_state, n_features=n_features), - FeatureAgglomeration: partial(params_sklearn_cluster_FeatureAgglomeration,n_features=n_features), - MaxAbsScaler: {}, - MinMaxScaler: {}, - Normalizer: params_sklearn_preprocessing_Normalizer, - Nystroem: partial(params_sklearn_kernel_approximation_Nystroem, random_state=random_state, n_features=n_features), - PCA: partial(params_sklearn_decomposition_PCA, random_state=random_state, n_features=n_features), - PolynomialFeatures: { - 'degree': 2, - 'include_bias': False, - 'interaction_only': False, - }, - RBFSampler: partial(params_sklearn_kernel_approximation_RBFSampler, random_state=random_state, n_features=n_features), - RobustScaler: {}, - StandardScaler: {}, - ZeroCount: params_tpot_builtins_ZeroCount, - ColumnOneHotEncoder: params_tpot_builtins_OneHotEncoder, - } + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + + ) diff --git a/tpot2/evolvers/__init__.py b/tpot2/evolvers/__init__.py index cf130f80..1d6af1a9 100644 --- a/tpot2/evolvers/__init__.py +++ b/tpot2/evolvers/__init__.py @@ -1,2 +1,2 @@ from .base_evolver import * -from .steady_state_evolver import * \ No newline at end of file +#from .steady_state_evolver import * \ No newline at end of file diff --git a/tpot2/evolvers/base_evolver.py b/tpot2/evolvers/base_evolver.py index 9959f9ab..b8f64cbe 100644 --- a/tpot2/evolvers/base_evolver.py +++ b/tpot2/evolvers/base_evolver.py @@ -4,7 +4,7 @@ import tpot2 import typing import tqdm -from tpot2.individual_representations.individual import BaseIndividual +from tpot2 import BaseIndividual import time import numpy as np import copy @@ -20,13 +20,13 @@ import math from tpot2.utils.utils import get_thresholds, beta_interpolation, remove_items, equalize_list -def ind_mutate(ind, rng_): - rng = np.random.default_rng(rng_) - return ind.mutate(rng_=rng) +def ind_mutate(ind, rng): + rng = np.random.default_rng(rng) + return ind.mutate(rng=rng) -def ind_crossover(ind1, ind2, rng_): - rng = np.random.default_rng(rng_) - return ind1.crossover(ind2, rng_=rng) +def ind_crossover(ind1, ind2, rng): + rng = np.random.default_rng(rng) + return ind1.crossover(ind2, rng=rng) class BaseEvolver(): def __init__( self, @@ -87,7 +87,7 @@ def __init__( self, verbose = 0, periodic_checkpoint_folder = None, callback = None, - rng_=None, + rng=None, ) -> None: """ @@ -196,7 +196,7 @@ def __init__( self, If provided, training will resume from this checkpoint. callback : tpot2.CallBackInterface, default=None Callback object. Not implemented - rng_ : Numpy.Random.Generator, None, default=None + rng : Numpy.Random.Generator, None, default=None An object for reproducability of experiments. This value will be passed to numpy.random.default_rng() to create an instnce of the genrator to pass to other classes - Numpy.Random.Generator @@ -205,7 +205,7 @@ def __init__( self, Will be used to create Generator for 'numpy.random.default_rng()' where a fresh, unpredictable entropy will be pulled from the OS """ - self.rng = np.random.default_rng(rng_) + self.rng = np.random.default_rng(rng) if threshold_evaluation_early_stop is not None or selection_evaluation_early_stop is not None: if evaluation_early_stop_steps is None: @@ -521,7 +521,7 @@ def step(self,): columns_names=self.objective_names, n_survivors=n_survivors, inplace=True, - rng_=self.rng,) + rng=self.rng,) self.generate_offspring() self.evaluate_population() @@ -529,7 +529,7 @@ def step(self,): self.generation += 1 def generate_offspring(self, ): #your EA Algorithm goes here - parents = self.population.parent_select(selector=self.parent_selector, weights=self.objective_function_weights, columns_names=self.objective_names, k=self.cur_population_size, n_parents=2, rng_=self.rng) + parents = self.population.parent_select(selector=self.parent_selector, weights=self.objective_function_weights, columns_names=self.objective_names, k=self.cur_population_size, n_parents=2, rng=self.rng) p = np.array([self.crossover_probability, self.mutate_then_crossover_probability, self.crossover_then_mutate_probability, self.mutate_probability]) p = p / p.sum() var_op_list = self.rng.choice(["crossover", "mutate_then_crossover", "crossover_then_mutate", "mutate"], size=self.cur_population_size, p=p) @@ -538,7 +538,7 @@ def generate_offspring(self, ): #your EA Algorithm goes here if op == "mutate": parents[i] = parents[i][0] #mutations take a single individual - offspring = self.population.create_offspring2(parents, var_op_list, self.mutation_functions, self.mutation_function_weights, self.crossover_functions, self.crossover_function_weights, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng_=self.rng) + offspring = self.population.create_offspring2(parents, var_op_list, self.mutation_functions, self.mutation_function_weights, self.crossover_functions, self.crossover_function_weights, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng=self.rng) self.population.update_column(offspring, column_names="Generation", data=self.generation, ) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index a45e4059..22a064fb 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -67,10 +67,10 @@ def __init__( self, periodic_checkpoint_folder = None, callback = None, - rng_=None + rng=None ) -> None: - self.rng = np.random.default_rng(rng_) + self.rng = np.random.default_rng(rng) self.max_evaluated_individuals = max_evaluated_individuals self.individuals_until_end_budget = individuals_until_end_budget @@ -176,7 +176,7 @@ def __init__( self, if self.population is None: self.population = tpot2.Population(column_names=init_names) initial_population = [next(self.individual_generator) for _ in range(self.initial_population_size)] - self.population.add_to_population(initial_population, rng_=self.rng) + self.population.add_to_population(initial_population, rng=self.rng) def optimize(self): @@ -404,13 +404,13 @@ def optimize(self): if len(cur_evaluated_population) > self.population_size: scores = evaluated[self.objective_names].to_numpy() weighted_scores = scores * self.objective_function_weights - new_population_index = np.ravel(self.survival_selector(weighted_scores, k=self.population_size, rng_=self.rng)) #TODO make it clear that we are concatenating scores... + new_population_index = np.ravel(self.survival_selector(weighted_scores, k=self.population_size, rng=self.rng)) #TODO make it clear that we are concatenating scores... #set new population try: cur_evaluated_population = np.array(cur_evaluated_population)[new_population_index] cur_evaluated_population = np.concatenate([cur_evaluated_population, unevaluated["Individual"].to_numpy()]) - self.population.set_population(cur_evaluated_population, rng_=self.rng) + self.population.set_population(cur_evaluated_population, rng=self.rng) except Exception as e: print("Exception in survival selection") print(e) @@ -447,11 +447,11 @@ def optimize(self): parents = [] for op in var_ops: if op == "mutate": - parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=1, rng_=self.rng)]) + parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=1, rng=self.rng)]) else: - parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=2, rng_=self.rng)]) + parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=2, rng=self.rng)]) - _offspring = self.population.create_offspring(parents, var_ops, rng_=self.rng, n_jobs=1, add_to_population=True) + _offspring = self.population.create_offspring(parents, var_ops, rng=self.rng, n_jobs=1, add_to_population=True) # If we don't have enough evaluated individuals to use as parents for variation, we create new individuals randomly # This can happen if the individuals in the initial population are invalid diff --git a/tpot2/individual_representations/individual.py b/tpot2/individual.py similarity index 82% rename from tpot2/individual_representations/individual.py rename to tpot2/individual.py index be61fdcb..db6807c3 100644 --- a/tpot2/individual_representations/individual.py +++ b/tpot2/individual.py @@ -13,8 +13,8 @@ def __init__(self) -> None: self.mutation_list = [] self.crossover_list = [] - def mutate(self, rng_=None): - rng = np.random.default_rng(rng_) + def mutate(self, rng=None): + rng = np.random.default_rng(rng) mutation_list_copy = self.mutation_list.copy() rng.shuffle(mutation_list_copy) for func in mutation_list_copy: @@ -22,8 +22,8 @@ def mutate(self, rng_=None): return True return False - def crossover(self, ind2, rng_=None): - rng = np.random.default_rng(rng_) + def crossover(self, ind2, rng=None): + rng = np.random.default_rng(rng) crossover_list_copy = self.crossover_list.copy() rng.shuffle(crossover_list_copy) for func in crossover_list_copy: @@ -32,10 +32,10 @@ def crossover(self, ind2, rng_=None): return False # a guided change of an individual when given an objective function - def optimize(self, objective_function, rng_=None , steps=5): - rng = np.random.default_rng(rng_) + def optimize(self, objective_function, rng=None , steps=5): + rng = np.random.default_rng(rng) for _ in range(steps): - self.mutate(rng_=rng) + self.mutate(rng=rng) #Return a hashable unique to this individual setup #For use when evaluating whether or not an individual is 'the same' and another individual diff --git a/tpot2/individual_representations/__init__.py b/tpot2/individual_representations/__init__.py deleted file mode 100644 index 77457504..00000000 --- a/tpot2/individual_representations/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .individual import BaseIndividual -from .subset_selector import SubsetSelector -from .graph_pipeline_individual import GraphIndividual - -from . import graph_pipeline_individual \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/__init__.py b/tpot2/individual_representations/graph_pipeline_individual/__init__.py deleted file mode 100644 index 3710b0c3..00000000 --- a/tpot2/individual_representations/graph_pipeline_individual/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .graph_utils import * -from .individual import * -from .templates import * -from .optuna_optimize import * diff --git a/tpot2/individual_representations/graph_pipeline_individual/graph_utils/__init__.py b/tpot2/individual_representations/graph_pipeline_individual/graph_utils/__init__.py deleted file mode 100644 index 758924a0..00000000 --- a/tpot2/individual_representations/graph_pipeline_individual/graph_utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .graph_utils import * \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py deleted file mode 100644 index f890e80f..00000000 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ /dev/null @@ -1,1222 +0,0 @@ -import numpy as np -from tpot2 import config -import networkx as nx -from abc import abstractmethod -import matplotlib.pyplot as plt -import sklearn -import tpot2 -import sklearn.pipeline -from typing import Generator -import optuna -from itertools import combinations -from .graph_utils import graph_utils -import itertools -import baikal -import copy -from .. import BaseIndividual - -class NodeLabel(): - def __init__(self, *, - - #intialized, but may change later - method_class = None, #transformer or baseestimator - hyperparameters=None, - label=None, - ): - - #intializable, but may change later - self.method_class = method_class #transformer or baseestimator - self.hyperparameters = hyperparameters - self.label = label - self._params = None - - - -from functools import partial -#@https://stackoverflow.com/questions/20530455/isomorphic-comparison-of-networkx-graph-objects-instead-of-the-default-address - -class GraphKey(): - ''' - A class that can be used as a key for a graph. - - Parameters - ---------- - graph : (nx.Graph) - The graph to use as a key. Node Attributes are used for the hash. - matched_label : (str) - The node attribute to consider for the hash. - ''' - - def __init__(self, graph, matched_label='label') -> None:#['hyperparameters', 'method_class']) -> None: - - - self.graph = graph - self.matched_label = matched_label - self.node_match = partial(node_match, matched_labels=[matched_label]) - self.key = int(nx.weisfeiler_lehman_graph_hash(self.graph, node_attr=self.matched_label),16) #hash(tuple(sorted([val for (node, val) in self.graph.degree()]))) - - - #If hash is different, node is definitely different - # https://arxiv.org/pdf/2002.06653.pdf - def __hash__(self) -> int: - - return self.key - - #If hash is same, use __eq__ to know if they are actually different - def __eq__(self, other): - return nx.is_isomorphic(self.graph, other.graph, node_match=self.node_match) - -def node_match(n1,n2, matched_labels): - return all( [ n1[m] == n2[m] for m in matched_labels]) - - -class GraphIndividual(BaseIndividual): - ''' - An individual that contains a template for a graph sklearn pipeline. - - Parameters - ---------- - root_config_dict : {dict with format {method class: param_function}} - A dictionary of methods and functions that return a dictionary of hyperparameters. - Used to create the root node of the graph. - inner_config_dict : {dict with format {method class: param_function}} - A dictionary of methods and functions that return a dictionary of hyperparameters. - Used to create the inner nodes of the graph. If None, uses root_config_dict. - leaf_config_dict : {dict with format {method class: param_function}} - A dictionary of methods and functions that return a dictionary of hyperparameters. - Used to create the leaf nodes of the graph. If not None, then all leafs must be created from this dictionary. - Otherwise leaves will be created from inner_config_dict. - initial_graph : (nx.DiGraph or list): - A graph to initialize the individual with. - If a list, it will initialize a linear graph with the methods in the list in the sequence provided. - If the items in the list are dictionaries, nodes will be itialized with those dictionaries. - Strings in the list correspond to the default configuration files. They can be 'Selector', 'Regressor', 'Transformer', 'Classifier'. - max_depth : (int) - The maximum depth of the graph as measured by the shortest distance from the root. - max_size : (int) - The maximum number of nodes in the graph. - max_children : (int) - The maximum number of children a node can have. - name : (str) - The name of the individual. - crossover_same_depth : (bool) - If true, then crossover will only occur between nodes of the same depth as measured by the shortest distance from the root. - crossover_same_recursive_depth : (bool) - If the graph is recursive, then crossover will only occur between graphs of the same recursive depth as measured by the shortest distance from the root. - ''' - def __init__( - self, - root_config_dict, - inner_config_dict=None, - leaf_config_dict=None, - initial_graph = None, - max_size = np.inf, - linear_pipeline = False, - name=None, - crossover_same_depth = False, - crossover_same_recursive_depth = True, - - hyperparameter_probability = 1, - hyper_node_probability = 0, - hyperparameter_alpha = 1, - - unique_subset_values = None, - initial_subset_values = None, - rng_=None, - ): - - self.__debug = False - - rng = np.random.default_rng(rng_) - - self.root_config_dict = root_config_dict - self.inner_config_dict = inner_config_dict - self.leaf_config_dict = leaf_config_dict - - - self.max_size = max_size - self.name = name - - self.crossover_same_depth = crossover_same_depth - self.crossover_same_recursive_depth = crossover_same_recursive_depth - - self.unique_subset_values = unique_subset_values - self.initial_subset_values = initial_subset_values - - self.hyperparameter_probability = hyperparameter_probability - self.hyper_node_probability = hyper_node_probability - self.hyperparameter_alpha = hyperparameter_alpha - - if self.unique_subset_values is not None: - self.row_subset_selector = tpot2.representations.SubsetSelector(rng_=rng, values=unique_subset_values, initial_set=initial_subset_values,k=20) - - if isinstance(initial_graph, nx.DiGraph): - self.graph = initial_graph - self.root = list(nx.topological_sort(self.graph))[0] - - if self.leaf_config_dict is not None and len(self.graph.nodes) == 1: - first_leaf = create_node(self.leaf_config_dict, rng_=rng) - self.graph.add_edge(self.root,first_leaf) - - elif isinstance(initial_graph, list): - node_list = [] - for item in initial_graph: - if isinstance(item, dict): - node_list.append(create_node(item, rng_=rng)) - elif isinstance(item, str): - if item == 'Selector': - from tpot2.config import selector_config_dictionary - node_list.append(create_node(selector_config_dictionary, rng_=rng)) - elif item == 'Regressor': - from tpot2.config import regressor_config_dictionary - node_list.append(create_node(regressor_config_dictionary, rng_=rng)) - elif item == 'Transformer': - from tpot2.config import transformer_config_dictionary - node_list.append(create_node(transformer_config_dictionary, rng_=rng)) - elif item == 'Classifier': - from tpot2.config import classifier_config_dictionary - node_list.append(create_node(classifier_config_dictionary, rng_=rng)) - - self.graph = nx.DiGraph() - for child, parent in zip(node_list, node_list[1:]): - self.graph.add_edge(parent, child) - - self.root = node_list[-1] - - else: - self.graph = nx.DiGraph() - - self.root = create_node(self.root_config_dict, rng_=rng) - self.graph.add_node(self.root) - - if self.leaf_config_dict is not None: - first_leaf = create_node(self.leaf_config_dict, rng_=rng) - self.graph.add_edge(self.root,first_leaf) - - - - self.initialize_all_nodes(rng_=rng) - - #self.root =list(nx.topological_sort(self.graph))[0] - - - self.mutate_methods_list = [self._mutate_hyperparameters, - self._mutate_replace_node, - self._mutate_remove_node, - ] - - self.crossover_methods_list = [ - self._crossover_swap_branch, - ] - - - if self.inner_config_dict is not None: - self.mutate_methods_list.append(self._mutate_insert_inner_node) - self.crossover_methods_list.append(self._crossover_take_branch) #this is the only crossover method that can create inner nodes - if not linear_pipeline: - self.mutate_methods_list.append(self._mutate_insert_bypass_node) - self.mutate_methods_list.append(self._mutate_remove_edge) - self.mutate_methods_list.append(self._mutate_add_edge) - - if not linear_pipeline and (self.leaf_config_dict is not None or self.inner_config_dict is not None): - self.mutate_methods_list.append(self._mutate_insert_leaf) - - - - - if self.unique_subset_values is not None: - self.crossover_methods_list.append(self._crossover_row_subsets) - self.mutate_methods_list.append(self._mutate_row_subsets ) - - self.optimize_methods_list = [ #self._optimize_optuna_single_method_full_pipeline, - self._optimize_optuna_all_methods_full_pipeline] - - self.key = None - - def select_config_dict(self, node): - #check if the node is root, leaf, or inner - if len(list(self.graph.predecessors(node))) == 0: #root - return self.root_config_dict - elif self.leaf_config_dict is not None and len(list(self.graph.successors(node))) == 0: #leaf - return self.leaf_config_dict - else: #inner - return self.inner_config_dict - - - def initialize_all_nodes(self, rng_=None): - rng = np.random.default_rng(rng_) - for node in self.graph: - if isinstance(node,GraphIndividual): - continue - if node.method_class is None: - node.method_class = rng.choice(list(self.select_config_dict(node).keys())) - if node.hyperparameters is None: - get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - - - def fix_noncompliant_leafs(self, rng_=None): - rng = np.random.default_rng(rng_) - leafs = [node for node in self.graph.nodes if len(list(self.graph.successors(node)))==0] - compliant_leafs = [] - noncompliant_leafs = [] - for leaf in leafs: - if leaf.method_class in self.leaf_config_dict: - compliant_leafs.append(leaf) - else: - noncompliant_leafs.append(leaf) - - #find all good leafs. If no good leaves exist, create a new one - if len(compliant_leafs) == 0: - first_leaf = NodeLabel(config_dict=self.leaf_config_dict) - first_leaf.method_class = rng.choice(list(first_leaf.config_dict.keys())) #TODO: check when there is no new method - first_leaf.hyperparameters = first_leaf.config_dict[first_leaf.method_class](config.hyperparametersuggestor) - get_hyperparameter(self.select_config_dict(first_leaf)[first_leaf.method_class], nodelabel=first_leaf, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - compliant_leafs.append(first_leaf) - - #connect bad leaves to good leaves (making them internal nodes) - if len(noncompliant_leafs) > 0: - for node in noncompliant_leafs: - self.graph.add_edge(node, rng.choice(compliant_leafs)) - - - - - def _merge_duplicated_nodes(self): - - graph_changed = False - merged = False - while(not merged): - node_list = list(self.graph.nodes) - merged = True - for node, other_node in itertools.product(node_list, node_list): - if node is other_node or isinstance(node,GraphIndividual) or isinstance(other_node,GraphIndividual): - continue - - #If nodes are same class/hyperparameters - if node.method_class == other_node.method_class and node.hyperparameters == other_node.hyperparameters: - node_children = set(self.graph.successors(node)) - other_node_children = set(self.graph.successors(other_node)) - #if nodes have identical children, they can be merged - if node_children == other_node_children: - for other_node_parent in list(self.graph.predecessors(other_node)): - if other_node_parent not in self.graph.predecessors(node): - self.graph.add_edge(other_node_parent,node) - - self.graph.remove_node(other_node) - merged=False - graph_changed = True - break - - return graph_changed - - #returns a flattened pipeline - def flatten_pipeline(self,depth=0): - flattened_full_graph = self.graph.copy() - remove_list = [] - for node in flattened_full_graph: - if isinstance(node,GraphIndividual): - flattened = node.flatten_pipeline(depth+1) - roots = graph_utils.get_roots(flattened) - leaves = graph_utils.get_leaves(flattened) - - n1_s = flattened_full_graph.successors(node) - n1_p = flattened_full_graph.predecessors(node) - - remove_list.append(node) - - - flattened_full_graph = nx.compose(flattened_full_graph, flattened) - - - flattened_full_graph.add_edges_from([ (n2, n) for n in n1_s for n2 in leaves]) - flattened_full_graph.add_edges_from([ (n, n2) for n in n1_p for n2 in roots]) - else: - flattened_full_graph.nodes[node]['recursive depth'] = depth - - - for node in remove_list: - flattened_full_graph.remove_node(node) - - if self.unique_subset_values is not None: - for node in flattened_full_graph: - if "subset_values" not in flattened_full_graph.nodes[node]: - flattened_full_graph.nodes[node]["subset_values"] = list(self.row_subset_selector.subsets) - else: - #intersection - flattened_full_graph.nodes[node]["subset_values"] = list(set(flattened_full_graph.nodes[node]["subset_values"]) & set(self.row_subset_selector.subsets)) - - return flattened_full_graph - - def get_num_nodes(self,): - num_nodes = 0 - - for node in self.graph.nodes: - if isinstance(node, GraphIndividual): - num_nodes+= node.get_num_nodes() - else: - num_nodes += 1 - - return num_nodes - - - def export_nested_pipeline(self, **graph_pipeline_args): - - flattened_full_graph = self.graph.copy() - remove_list = [] - for node in list(flattened_full_graph.nodes): - if isinstance(node,GraphIndividual): - gp = node.export_pipeline(**graph_pipeline_args) - - n1_s = flattened_full_graph.successors(node) - n1_p = flattened_full_graph.predecessors(node) - - remove_list.append(node) - - flattened_full_graph.add_node(gp) - - - flattened_full_graph.add_edges_from([ (gp, n) for n in n1_s]) - flattened_full_graph.add_edges_from([ (n, gp) for n in n1_p]) - - - for node in remove_list: - flattened_full_graph.remove_node(node) - - estimator_graph = flattened_full_graph - - #mapping = {node:node.method_class(**node.hyperparameters) for node in estimator_graph} - label_remapping = {} - label_to_instance = {} - - for node in estimator_graph: - found_unique_label = False - i=1 - while not found_unique_label: - print(type(node)) - if type(node) is tpot2.GraphPipeline: - label = "GraphPipeline_{0}".format( i) - else: - label = "{0}_{1}".format(node.method_class.__name__, i) - if label not in label_to_instance: - found_unique_label = True - else: - i+=1 - - - if type(node) is tpot2.GraphPipeline: - label_remapping[node] = label - label_to_instance[label] = node - else: - label_remapping[node] = label - label_to_instance[label] = node.method_class(**node.hyperparameters) - - estimator_graph = nx.relabel_nodes(estimator_graph, label_remapping) - - for label, instance in label_to_instance.items(): - estimator_graph.nodes[label]["instance"] = instance - - return tpot2.GraphPipeline(graph=estimator_graph, **graph_pipeline_args) - - def export_pipeline(self, **graph_pipeline_args): - estimator_graph = self.flatten_pipeline() - - #mapping = {node:node.method_class(**node.hyperparameters) for node in estimator_graph} - label_remapping = {} - label_to_instance = {} - - for node in estimator_graph: - found_unique_label = False - i=1 - while not found_unique_label: - label = "{0}_{1}".format(node.method_class.__name__, i) - if label not in label_to_instance: - found_unique_label = True - else: - i+=1 - - label_remapping[node] = label - label_to_instance[label] = node.method_class(**node.hyperparameters) - - estimator_graph = nx.relabel_nodes(estimator_graph, label_remapping) - - for label, instance in label_to_instance.items(): - estimator_graph.nodes[label]["instance"] = instance - - return tpot2.GraphPipeline(graph=estimator_graph, **graph_pipeline_args) - - def export_baikal(self,): - graph = self.flatten_pipeline() - toposorted = list(nx.topological_sort(graph)) - toposorted.reverse() - node_outputs = {} - - X = baikal.Input('X') - y = baikal.Input('Target') - - for i in range(len(toposorted)): - node = toposorted[i] - if len(list(graph.successors(node))) == 0: #If this node had no inputs use X - this_inputs = X - else: #in node has inputs, get those - this_inputs = [node_outputs[child] for child in graph.successors(node)] - - this_output = baikal.make_step(node.method_class, class_name=node.method_class.__name__)(**node.hyperparameters)(this_inputs,y) - node_outputs[node] = this_output - - if i == len(toposorted)-1: #last method doesn't need transformed. - return baikal.Model(inputs=X, outputs=this_output, targets=y) - - - def plot(self): - G = self.flatten_pipeline().reverse() #self.graph.reverse() - #TODO clean this up - try: - pos = nx.planar_layout(G) # positions for all nodes - except: - pos = nx.shell_layout(G) - # nodes - options = {'edgecolors': 'tab:gray', 'node_size': 800, 'alpha': 0.9} - nodelist = list(G.nodes) - node_color = [plt.cm.Set1(G.nodes[n]['recursive depth']) for n in G] - - fig, ax = plt.subplots() - - nx.draw(G, pos, nodelist=nodelist, node_color=node_color, ax=ax, **options) - - - '''edgelist = [] - for n in n1.node_set: - for child in n.children: - edgelist.append((n,child))''' - - # edges - #nx.draw_networkx_edges(G, pos, width=3.0, arrows=True) - '''nx.draw_networkx_edges( - G, - pos, - edgelist=[edgelist], - width=8, - alpha=0.5, - edge_color='tab:red', - )''' - - - - # some math labels - labels = {} - for i, n in enumerate(G.nodes): - labels[n] = n.method_class.__name__ + "\n" + str(n.hyperparameters) - - - nx.draw_networkx_labels(G, pos, labels,ax=ax, font_size=7, font_color='black') - - plt.tight_layout() - plt.axis('off') - plt.show() - - - ############# - - #TODO currently does not correctly return false when adding a leaf causes a duplicate node that is later merged - def mutate(self, rng_=None): - rng = np.random.default_rng(rng_) - self.key = None - graph = self.select_graphindividual(rng_=rng) - return graph._mutate(rng_=rng) - - def _mutate(self, rng_=None): - rng = np.random.default_rng(rng_) - rng.shuffle(self.mutate_methods_list) - for mutate_method in self.mutate_methods_list: - if mutate_method(rng_=rng): - self._merge_duplicated_nodes() - - if self.__debug: - print(mutate_method) - - if self.root not in self.graph.nodes: - print('lost root something went wrong with ', mutate_method) - - if len(self.graph.predecessors(self.root)) > 0: - print('root has parents ', mutate_method) - - if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]): - print('a node is connecting to itself...') - - if self.__debug: - try: - nx.find_cycle(self.graph) - print('something went wrong with ', mutate_method) - except: - pass - - return True - - return False - - def _mutate_row_subsets(self, rng_=None): - rng = np.random.default_rng(rng_) - if self.unique_subset_values is not None: - self.row_subset_selector.mutate(rng_=rng) - - - def _mutate_hyperparameters(self, rng_=None): - ''' - Mutates the hyperparameters for a randomly chosen node in the graph. - ''' - rng = np.random.default_rng(rng_) - sorted_nodes_list = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) - completed_one = False - for node in sorted_nodes_list: - if isinstance(node,GraphIndividual): - continue - if isinstance(self.select_config_dict(node)[node.method_class], dict): - continue - - if not completed_one: - _,_, completed_one = get_hyperparameter(self.select_config_dict(node)[node.method_class], rng_=rng, nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - else: - if self.hyper_node_probability > rng.random(): - get_hyperparameter(self.select_config_dict(node)[node.method_class], rng_=rng, nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - - return completed_one - - - - - def _mutate_replace_node(self, rng_=None): - ''' - Replaces the method in a randomly chosen node by a method from the available methods for that node. - - ''' - rng = np.random.default_rng(rng_) - sorted_nodes_list = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) - for node in sorted_nodes_list: - if isinstance(node,GraphIndividual): - continue - new_node = create_node(self.select_config_dict(node), rng_=rng) - #check if new node and old node are the same - #TODO: add attempts? - if node.method_class != new_node.method_class or node.hyperparameters != new_node.hyperparameters: - nx.relabel_nodes(self.graph, {new_node:node}, copy=False) - return True - - return False - - - def _mutate_remove_node(self, rng_=None): - ''' - Removes a randomly chosen node and connects its parents to its children. - If the node is the only leaf for an inner node and 'leaf_config_dict' is not none, we do not remove it. - ''' - rng = np.random.default_rng(rng_) - nodes_list = list(self.graph.nodes) - nodes_list.remove(self.root) - leaves = graph_utils.get_leaves(self.graph) - - while len(nodes_list) > 0: - node = rng.choice(nodes_list) - nodes_list.remove(node) - - if self.leaf_config_dict is not None and len(list(nx.descendants(self.graph,node))) == 0 : #if the node is a leaf - if len(leaves) <= 1: - continue #dont remove the last leaf - leaf_parents = self.graph.predecessors(node) - - # if any of the parents of the node has one one child, continue - if any([len(list(self.graph.successors(lp))) < 2 for lp in leaf_parents]): #dont remove a leaf if it is the only input into another node. - continue - - graph_utils.remove_and_stitch(self.graph, node) - graph_utils.remove_nodes_disconnected_from_node(self.graph, self.root) - return True - - else: - graph_utils.remove_and_stitch(self.graph, node) - graph_utils.remove_nodes_disconnected_from_node(self.graph, self.root) - return True - - return False - - def _mutate_remove_edge(self, rng_=None): - ''' - Deletes an edge as long as deleting that edge does not make the graph disconnected. - ''' - rng = np.random.default_rng(rng_) - sorted_nodes_list = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) - for child_node in sorted_nodes_list: - parents = list(self.graph.predecessors(child_node)) - if len(parents) > 1: # if it has more than one parent, you can remove an edge (if this is the only child of a node, it will become a leaf) - - for parent_node in parents: - # if removing the egde will make the parent_node a leaf node, skip - if self.leaf_config_dict is not None and len(list(self.graph.successors(parent_node))) < 2: - continue - - self.graph.remove_edge(parent_node, child_node) - return True - return False - - def _mutate_add_edge(self, rng_=None): - ''' - Randomly add an edge from a node to another node that is not an ancestor of the first node. - ''' - rng = np.random.default_rng(rng_) - sorted_nodes_list = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) - for child_node in sorted_nodes_list: - for parent_node in sorted_nodes_list: - if self.leaf_config_dict is not None: - if len(list(self.graph.successors(parent_node))) == 0: - continue - - # skip if - # - parent and child are the same node - # - edge already exists - # - child is an ancestor of parent - if (child_node is not parent_node) and not self.graph.has_edge(parent_node,child_node) and (child_node not in nx.ancestors(self.graph, parent_node)): - self.graph.add_edge(parent_node,child_node) - return True - - return False - - - def _mutate_insert_leaf(self, rng_=None): - rng = np.random.default_rng(rng_) - if self.max_size > self.graph.number_of_nodes(): - sorted_nodes_list = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another - for node in sorted_nodes_list: - #if leafs are protected, check if node is a leaf - #if node is a leaf, skip because we don't want to add node on top of node - if (self.leaf_config_dict is not None #if leafs are protected - and len(list(self.graph.successors(node))) == 0 #if node is leaf - and len(list(self.graph.predecessors(node))) > 0 #except if node is root, in which case we want to add a leaf even if it happens to be a leaf too - ): - - continue - - #If node *is* the root or is not a leaf, add leaf node. (dont want to add leaf on top of leaf) - if self.leaf_config_dict is not None: - new_node = create_node(self.leaf_config_dict, rng_=rng) - else: - new_node = create_node(self.inner_config_dict, rng_=rng) - - self.graph.add_node(new_node) - self.graph.add_edge(node, new_node) - return True - - return False - - def _mutate_insert_bypass_node(self, rng_=None): - rng = np.random.default_rng(rng_) - if self.max_size > self.graph.number_of_nodes(): - sorted_nodes_list = list(self.graph.nodes) - sorted_nodes_list2 = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another - rng.shuffle(sorted_nodes_list2) - for node in sorted_nodes_list: - for child_node in sorted_nodes_list2: - if child_node is not node and child_node not in nx.ancestors(self.graph, node): - if self.leaf_config_dict is not None: - #If if we are protecting leafs, dont add connection into a leaf - if len(list(nx.descendants(self.graph,node))) ==0 : - continue - - new_node = create_node(config_dict = self.inner_config_dict, rng_=rng) - - self.graph.add_node(new_node) - self.graph.add_edges_from([(node, new_node), (new_node, child_node)]) - return True - - return False - - - def _mutate_insert_inner_node(self, rng_=None): - rng = np.random.default_rng(rng_) - if self.max_size > self.graph.number_of_nodes(): - sorted_nodes_list = list(self.graph.nodes) - sorted_nodes_list2 = list(self.graph.nodes) - rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another - rng.shuffle(sorted_nodes_list2) - for node in sorted_nodes_list: - #loop through children of node - for child_node in list(self.graph.successors(node)): - - if child_node is not node and child_node not in nx.ancestors(self.graph, node): - if self.leaf_config_dict is not None: - #If if we are protecting leafs, dont add connection into a leaf - if len(list(nx.descendants(self.graph,node))) ==0 : - continue - - new_node = create_node(config_dict = self.inner_config_dict, rng_=rng) - - self.graph.add_node(new_node) - self.graph.add_edges_from([(node, new_node), (new_node, child_node)]) - self.graph.remove_edge(node, child_node) - return True - - return False - - ###################################################### - # Crossover - - def get_graphs(self): - graphs = [self] - self.graph.graph['depth'] = 0 - self.graph.graph['recursive depth'] = 0 - for node in self.graph.nodes: - if isinstance(node, GraphIndividual): - node.graph.graph['depth'] = nx.shortest_path_length(self.graph, self.root, node) - graphs = graphs + node._get_graphs(depth=1) - - return graphs - - - def _get_graphs(self, depth=1): - graphs = [self] - self.graph.graph['recursive depth'] = depth - for node in self.graph.nodes: - if isinstance(node, GraphIndividual): - node.graph.graph['depth'] = nx.shortest_path_length(self.graph, self.root, node) - graphs = graphs + node._get_graphs(depth=depth+1) - - return graphs - - - def select_graphindividual(self, rng_=None): - rng = np.random.default_rng(rng_) - graphs = self.get_graphs() - weights = [g.graph.number_of_nodes() for g in graphs] - w_sum = sum(weights) - weights = [w / w_sum for w in weights] # generate probabilities based on sum of weights - return rng.choice(graphs, p=weights) - - - def select_graph_same_recursive_depth(self,ind1,ind2,rng_=None): - rng = np.random.default_rng(rng_) - - graphs1 = ind1.get_graphs() - weights1 = [g.graph.number_of_nodes() for g in graphs1] - w1_sum = sum(weights1) - weights1 = [w / w1_sum for w in weights1] - - graphs2 = ind2.get_graphs() - weights2 = [g.graph.number_of_nodes() for g in graphs2] - w2_sum = sum(weights2) - weights2 = [w / w2_sum for w in weights2] - - g1_sorted_graphs = random_weighted_sort(graphs1, weights1, rng) - g2_sorted_graphs = random_weighted_sort(graphs2, weights2, rng) - - for g1, g2 in zip(g1_sorted_graphs, g2_sorted_graphs): - if g1.graph.graph['depth'] == g2.graph.graph['depth'] and g1.graph.graph['recursive depth'] == g2.graph.graph['recursive depth']: - return g1, g2 - - return ind1,ind2 - - def crossover(self, ind2, rng_=None): - ''' - self is the first individual, ind2 is the second individual - If crossover_same_depth, it will select graphindividuals at the same recursive depth. - Otherwise, it will select graphindividuals randomly from the entire graph and its subgraphs. - - This does not impact graphs without subgraphs. And it does not impacts nodes that are not graphindividuals. Cros - ''' - - rng = np.random.default_rng(rng_) - - self.key = None - ind2.key = None - if self.crossover_same_recursive_depth: - # selects graphs from the same recursive depth and same depth from the root - g1, g2 = self.select_graph_same_recursive_depth(self, ind2, rng_=rng) - - - else: - g1 = self.select_graphindividual(rng_=rng) - g2 = ind2.select_graphindividual(rng_=rng) - - return g1._crossover(g2, rng_=rng) - - def _crossover(self, Graph, rng_=None): - rng = np.random.default_rng(rng_) - - rng.shuffle(self.crossover_methods_list) - for crossover_method in self.crossover_methods_list: - if crossover_method(Graph, rng_=rng): - self._merge_duplicated_nodes() - return True - - if self.__debug: - try: - nx.find_cycle(self.graph) - print('something went wrong with ', crossover_method) - except: - pass - - return False - - - def _crossover_row_subsets(self, G2, rng_=None): - rng = np.random.default_rng(rng_) - if self.unique_subset_values is not None and G2.unique_subset_values is not None: - self.row_subset_selector.crossover(G2.row_subset_selector, rng_=rng) - - - def _crossover_swap_node(self, G2, rng_=None): - ''' - Swaps randomly chosen node from Parent1 with a randomly chosen node from Parent2. - ''' - rng = np.random.default_rng(rng_) - - if self.crossover_same_depth: - pair_gen = graph_utils.select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng_=rng) - else: - pair_gen = graph_utils.select_nodes_randomly(self.graph, G2.graph, rng_=rng) - - for node1, node2 in pair_gen: - if not (node1 is self.root or node2 is G2.root): #TODO: allow root - - n1_s = self.graph.successors(node1) - n1_p = self.graph.predecessors(node1) - - n2_s = G2.graph.successors(node2) - n2_p = G2.graph.predecessors(node2) - - self.graph.remove_node(node1) - G2.graph.remove_node(node2) - - self.graph.add_node(node2) - - self.graph.add_edges_from([ (node2, n) for n in n1_s]) - G2.graph.add_edges_from([ (node1, n) for n in n2_s]) - - self.graph.add_edges_from([ (n, node2) for n in n1_p]) - G2.graph.add_edges_from([ (n, node1) for n in n2_p]) - - return True - return False - - - - def _crossover_swap_branch(self, G2, rng_=None): - ''' - swaps a branch from parent1 with a branch from parent2. does not modify parent2 - ''' - rng = np.random.default_rng(rng_) - - if self.crossover_same_depth: - pair_gen = graph_utils.select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng_=rng) - else: - pair_gen = graph_utils.select_nodes_randomly(self.graph, G2.graph, rng_=rng) - - for node1, node2 in pair_gen: - #TODO: if root is in inner_config_dict, then do use it? - if node1 is self.root or node2 is G2.root: #dont want to add root as inner node - continue - - #check if node1 is a leaf and leafs are protected, don't add an input to the leave - if self.leaf_config_dict is not None: #if we are protecting leaves, - node1_is_leaf = len(list(self.graph.successors(node1))) == 0 - node2_is_leaf = len(list(G2.graph.successors(node2))) == 0 - #if not ((node1_is_leaf and node1_is_leaf) or (not node1_is_leaf and not node2_is_leaf)): #if node1 is a leaf - if (node1_is_leaf and (not node2_is_leaf)) or ( (not node1_is_leaf) and node2_is_leaf): - #only continue if node1 and node2 are both leaves or both not leaves - continue - - temp_graph_1 = self.graph.copy() - temp_graph_1.remove_node(node1) - graph_utils.remove_nodes_disconnected_from_node(temp_graph_1, self.root) - - #isolating the branch - branch2 = G2.graph.copy() - n2_descendants = nx.descendants(branch2,node2) - for n in list(branch2.nodes): - if n not in n2_descendants and n is not node2: #removes all nodes not in the branch - branch2.remove_node(n) - - branch2 = copy.deepcopy(branch2) - branch2_root = graph_utils.get_roots(branch2)[0] - temp_graph_1.add_edges_from(branch2.edges) - for p in list(self.graph.predecessors(node1)): - temp_graph_1.add_edge(p,branch2_root) - - if temp_graph_1.number_of_nodes() > self.max_size: - continue - - self.graph = temp_graph_1 - - return True - return False - - #TODO: Currently returns true even if hyperparameters are blank - def _crossover_hyperparameters(self, G2, rng_=None): - ''' - Swaps the hyperparamters of one randomly chosen node in Parent1 with the hyperparameters of randnomly chosen node in Parent2. - ''' - rng = np.random.default_rng(rng_) - - if self.crossover_same_depth: - pair_gen = graph_utils.select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng_=rng) - else: - pair_gen = graph_utils.select_nodes_randomly(self.graph, G2.graph, rng_=rng) - - for node1, node2 in pair_gen: - if isinstance(node1,GraphIndividual) or isinstance(node2,GraphIndividual): - continue - - if node1.method_class == node2.method_class: - tmp = node1.hyperparameters - node1.hyperparameters = node2.hyperparameters - node2.hyperparameters = tmp - return True - - return False - - #not including the nodes, just their children - #Finds leaves attached to nodes and swaps them - def _crossover_swap_leaf_at_node(self, G2, rng_=None): - rng = np.random.default_rng(rng_) - - if self.crossover_same_depth: - pair_gen = graph_utils.select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng_=rng) - else: - pair_gen = graph_utils.select_nodes_randomly(self.graph, G2.graph, rng_=rng) - - success = False - for node1, node2 in pair_gen: - # if leaves are protected node1 and node2 must both be leaves or both be inner nodes - if self.leaf_config_dict is not None and not (len(list(self.graph.successors(node1)))==0 ^ len(list(G2.graph.successors(node2)))==0): - continue - #self_leafs = [c for c in nx.descendants(self.graph,node1) if len(list(self.graph.successors(c)))==0 and c is not node1] - node_leafs = [c for c in nx.descendants(G2.graph,node2) if len(list(G2.graph.successors(c)))==0 and c is not node2] - - # if len(self_leafs) >0: - # for c in self_leafs: - # if random.choice([True,False]): - # self.graph.remove_node(c) - # G2.graph.add_edge(node2, c) - # success = True - - if len(node_leafs) >0: - for c in node_leafs: - if rng.choice([True,False]): - G2.graph.remove_node(c) - self.graph.add_edge(node1, c) - success = True - - return success - - - def _crossover_take_branch(self, G2, rng_=None): - ''' - Takes a subgraph from Parent2 and add it to a randomly chosen node in Parent1. - ''' - rng = np.random.default_rng(rng_) - - if self.crossover_same_depth: - pair_gen = graph_utils.select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng_=rng) - else: - pair_gen = graph_utils.select_nodes_randomly(self.graph, G2.graph, rng_=rng) - - for node1, node2 in pair_gen: - #TODO: if root is in inner_config_dict, then do use it? - if node2 is G2.root: #dont want to add root as inner node - continue - - - #check if node1 is a leaf and leafs are protected, don't add an input to the leave - if self.leaf_config_dict is not None and len(list(self.graph.successors(node1))) == 0: - continue - - #icheck if node2 is graph individual - # if isinstance(node2,GraphIndividual): - # if not ((isinstance(node2,GraphIndividual) and ("Recursive" in self.inner_config_dict or "Recursive" in self.leaf_config_dict))): - # continue - - #isolating the branch - branch2 = G2.graph.copy() - n2_descendants = nx.descendants(branch2,node2) - for n in list(branch2.nodes): - if n not in n2_descendants and n is not node2: #removes all nodes not in the branch - branch2.remove_node(n) - - #if node1 plus node2 branch has more than max_children, skip - if branch2.number_of_nodes() + self.graph.number_of_nodes() > self.max_size: - continue - - branch2 = copy.deepcopy(branch2) - branch2_root = graph_utils.get_roots(branch2)[0] - self.graph.add_edges_from(branch2.edges) - self.graph.add_edge(node1,branch2_root) - - return True - return False - - #TODO: swap all leaf nodes - def _crossover_swap_all_leafs(self, G2, rng_=None): - pass - - - #TODO: currently ignores ensembles, make it include nodes inside of ensembles - def optimize(self, rng_, objective_function, steps=5): - rng = np.random.default_rng(rng_) - rng.shuffle(self.optimize_methods_list) #select an optimization method - for optimize_method in self.optimize_methods_list: - if optimize_method(rng, objective_function, steps=steps): - return True - - #optimize the hyperparameters of one method to improve the entire pipeline - def _optimize_optuna_single_method_full_pipeline(self, rng_, objective_function, steps=5): - rng = np.random.default_rng(rng_) - nodes_list = list(self.graph.nodes) - rng.shuffle(nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another - for node in nodes_list: - if not isinstance(node, NodeLabel) or isinstance(self.select_config_dict(node)[node.method_class],dict): - continue - else: - study = optuna.create_study() - - def objective(trial): - params = self.select_config_dict(node)[node.method_class](trial) - node.hyperparameters = params - - trial.set_user_attr('params', params) - try: - return objective_function(self) - except: - return np.NAN - - study.optimize(objective, n_trials=steps) - node.hyperparameters = study.best_trial.user_attrs['params'] - return True - - - #optimize the hyperparameters of all methods simultaneously to improve the entire pipeline - def _optimize_optuna_all_methods_full_pipeline(self, rng_, objective_function, steps=5): - nodes_list = list(self.graph.nodes) - study = optuna.create_study() - nodes_to_optimize = [] - for node in nodes_list: - if not isinstance(node, NodeLabel) or isinstance(self.select_config_dict(node)[node.method_class],dict): - continue - else: - nodes_to_optimize.append(node) - - def objective(trial): - param_list = [] - for i, node in enumerate(nodes_to_optimize): - params = self.select_config_dict(node)[node.method_class](trial, name=f'node_{i}') - node.hyperparameters = params - param_list.append(params) - - trial.set_user_attr('params', param_list) - - try: - return objective_function(self) - except: - return np.NAN - - study.optimize(objective, n_trials=steps) - best_params = study.best_trial.user_attrs['params'] - - for node, params in zip(nodes_to_optimize,best_params): - node.hyperparameters = params - - return True - - - def _cached_transform(cache_nunber=0): - #use a cache for models at each CV fold? - #cache just transformations at each fold? - #TODO how to separate full model? - pass - - def __str__(self): - return self.export_pipeline().__str__() - - def unique_id(self) -> GraphKey: - if self.key is None: - g = self.flatten_pipeline() - for n in g.nodes: - if "subset_values" in g.nodes[n]: - g.nodes[n]['label'] = {n.method_class: n.hyperparameters, "subset_values":g.nodes[n]["subset_values"]} - else: - g.nodes[n]['label'] = {n.method_class: n.hyperparameters} - - g.nodes[n]['method_class'] = n.method_class #TODO making this transformation doesn't feel very clean? - g.nodes[n]['hyperparameters'] = n.hyperparameters - - g = nx.convert_node_labels_to_integers(g) - self.key = GraphKey(graph=g) - - return self.key - - def full_node_list(self): - node_list = list(self.graph.nodes) - for node in node_list: - if isinstance(node, GraphIndividual): - node_list.pop(node_list.index(node)) - node_list.extend(node.graph.nodes) - return node_list - - - - -def create_node(config_dict, rng_=None): - ''' - Takes a config_dict and returns a node with a random method_class and hyperparameters - ''' - rng = np.random.default_rng(rng_) - method_class = rng.choice(list(config_dict.keys())) - #if method_class == GraphIndividual or method_class == 'Recursive': - if method_class == 'Recursive': - node = GraphIndividual(**config_dict[method_class]) - else: - hyperparameters, params, _ = get_hyperparameter(config_dict[method_class], rng_=rng, nodelabel=None) - - node = NodeLabel( - method_class=method_class, - hyperparameters=hyperparameters - ) - node._params = params - - return node - - -def random_weighted_sort(l,weights, rng_=None): - rng = np.random.default_rng(rng_) - sorted_l = [] - indeces = {i: weights[i] for i in range(len(l))} - while len(indeces) > 0: - keys = list(indeces.keys()) - p = np.array([indeces[k] for k in keys]) - p = p / p.sum() - next_item = rng.choice(list(indeces.keys()), p=p) - indeces.pop(next_item) - sorted_l.append(l[next_item]) - - return sorted_l - - -def get_hyperparameter(config_func, rng_, nodelabel=None, alpha=1, hyperparameter_probability=1): - rng = np.random.default_rng(rng_) - changed = False - if isinstance(config_func, dict): - return config_func, None, changed - - if nodelabel is not None: - trial = config.hyperparametersuggestor.Trial(rng_=rng, old_params=nodelabel._params, alpha=alpha, hyperparameter_probability=hyperparameter_probability) - new_params = config_func(trial) - changed = trial._params != nodelabel._params - nodelabel._params = trial._params - nodelabel.hyperparameters = new_params - else: - trial = config.hyperparametersuggestor.Trial(rng_=rng, old_params=None, alpha=alpha, hyperparameter_probability=hyperparameter_probability) - new_params = config_func(trial) - - return new_params, trial._params, changed \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/optuna_optimize.py b/tpot2/individual_representations/graph_pipeline_individual/optuna_optimize.py deleted file mode 100644 index 0928b986..00000000 --- a/tpot2/individual_representations/graph_pipeline_individual/optuna_optimize.py +++ /dev/null @@ -1,228 +0,0 @@ -from tpot2.individual_representations.graph_pipeline_individual.individual import * -import optuna -import numpy as np -import copy -import dask -import traceback -import functools - -# labels all nodes in the graph with a unique ID. -# This allows use to identify exact nodes in the copies on the graph. -# This is necessary since copies of the graph use different NodeLabel object instances as keys, making it hard to identify which are the same nodes. -def label_nodes_in_graphindividual(graphindividual): - nodes_list = graphindividual.full_node_list() - for i, node in enumerate(nodes_list): - if not isinstance(node, NodeLabel): - continue - else: - node.label = f'node_{i}' - - -def optuna_optimize_full_graph(graphindividual, objective_function, objective_function_weights, steps=5, relabel=True, verbose=0, max_eval_time_seconds=60*5, max_time_seconds=60*20, n_returned_models='all', study=None, **objective_kwargs): - if relabel: - label_nodes_in_graphindividual(graphindividual) - - graphindividual = copy.deepcopy(graphindividual) - nodes_list = graphindividual.full_node_list() - - - nodes_to_optimize = [] - for node in nodes_list: - if not isinstance(node, NodeLabel) or isinstance(graphindividual.select_config_dict(node)[node.method_class],dict): - continue - else: - nodes_to_optimize.append(node) - - def objective(trial): - param_dict = dict() - graphindividual.key = None - for node in nodes_to_optimize: - params = graphindividual.select_config_dict(node)[node.method_class](trial, name=node.label) - node.hyperparameters = params - param_dict[node.label] = params - - trial.set_user_attr('params', param_dict) - - try: - scores = tpot2.objective_nan_wrapper(graphindividual, objective_function, verbose=verbose,timeout=max_eval_time_seconds,**objective_kwargs)#objective_function(graphindividual) - trial.set_user_attr('scores', list(scores)) - if scores[0] != "INVALID" and scores[0] != "TIMEOUT": - scores = np.array(scores) * objective_function_weights - scores = list(scores) - - except Exception as e: - print(e) - print(traceback.format_exc()) - scores = ['INVALID'] - trial.set_user_attr('scores', scores) - return scores - - study.optimize(objective, n_trials=steps, timeout=max_time_seconds) - - return study - -def graph_objective(trial, graphindividual, objective_function, objective_function_weights, verbose=0, max_eval_time_seconds=60*5, **objective_kwargs): - - graphindividual = copy.deepcopy(graphindividual) - nodes_list = graphindividual.full_node_list() - - - nodes_to_optimize = [] - for node in nodes_list: - if not isinstance(node, NodeLabel) or isinstance(graphindividual.select_config_dict(node)[node.method_class],dict): - continue - else: - nodes_to_optimize.append(node) - - param_dict = dict() - graphindividual.key = None - for node in nodes_to_optimize: - params = graphindividual.select_config_dict(node)[node.method_class](trial, name=node.label) - node.hyperparameters = params - param_dict[node.label] = params - - trial.set_user_attr('params', param_dict) - - try: - scores = tpot2.objective_nan_wrapper(graphindividual, objective_function, verbose=verbose,timeout=max_eval_time_seconds,**objective_kwargs)#objective_function(graphindividual) - trial.set_user_attr('scores', list(scores)) - if scores[0] != "INVALID" and scores[0] != "TIMEOUT": - scores = np.array(scores) * objective_function_weights - scores = list(scores) - - except Exception as e: - print(e) - print(traceback.format_exc()) - scores = ['INVALID'] - trial.set_user_attr('scores', scores) - - return scores - - -def simple_parallel_optuna(individuals, objective_function, objective_function_weights, client, storage, steps=5, verbose=0, max_eval_time_seconds=60*5, max_time_seconds=60*20, **objective_kwargs): - num_workers = len(client.scheduler_info()['workers']) - worker_per_individual = max(1,int(np.floor(num_workers/len(individuals)))) - remainder = num_workers%len(individuals) - - print(len(individuals)) - - directions = np.repeat('maximize',len(objective_function_weights)) - timeout = max(max_time_seconds/len(individuals), max_eval_time_seconds*2) - - studies = [] - for i, ind in enumerate(individuals): - label_nodes_in_graphindividual(ind) - print(ind) - - #study = optuna.create_study(directions=directions, storage=f"{storage}", load_if_exists=False) - backend_storage = optuna.storages.InMemoryStorage() - study = optuna.create_study(directions=directions, storage=backend_storage, load_if_exists=False) - studies.append(study) - - objective = functools.partial(graph_objective, graphindividual=ind, objective_function=objective_function, objective_function_weights=objective_function_weights, verbose=verbose, max_eval_time_seconds=max_eval_time_seconds, **objective_kwargs) - study.optimize(objective, n_trials=steps, timeout=timeout, n_jobs=num_workers) - - all_graphs = [] - all_scores = [] - for study, ind in zip(studies,individuals): - graphs, scores = get_all_individuals_from_study(study, ind) - all_graphs.extend(graphs) - all_scores.extend(scores) - - return all_graphs, all_scores - - - - -def simple_parallel_optuna_old(individuals, objective_function, objective_function_weights, client, storage, steps=5, verbose=0, max_eval_time_seconds=60*5, max_time_seconds=60*20, **objective_kwargs): - num_workers = len(client.scheduler_info()['workers']) - worker_per_individual = max(1,int(np.floor(num_workers/len(individuals)))) - remainder = num_workers%len(individuals) - - print(worker_per_individual) - print(remainder) - - directions = np.repeat('maximize',len(objective_function_weights)) - - - - futures = [] - studies = [] - for i, ind in enumerate(individuals): - label_nodes_in_graphindividual(ind) - #study = optuna.create_study(directions=directions, storage=f"{storage}", load_if_exists=False) - backend_storage = optuna.storages.InMemoryStorage() - dask_storage = optuna.integration.DaskStorage(storage=backend_storage, client=client) - study = optuna.create_study(directions=directions, storage=dask_storage, load_if_exists=False) - studies.append(study) - if i == 0: - n_futures = worker_per_individual + remainder - else: - n_futures = worker_per_individual - - trials_per_thread = int(np.ceil(steps/n_futures)) - - objective = functools.partial(graph_objective, graphindividual=ind, objective_function=objective_function, objective_function_weights=objective_function_weights, verbose=verbose, max_eval_time_seconds=max_eval_time_seconds) - for _ in range(n_futures): - #future = client.submit(study.optimize, objective, n_trials=trials_per_thread, pure=False, timeout=max_time_seconds,) - future = client.submit(submit_helper, study=study, objective=objective, n_trials=trials_per_thread, timeout=max_time_seconds, pure=False, **objective_kwargs) - futures.append(future) - #futures.append(client.submit(optuna_optimize_full_graph, graphindividual=ind, objective_function=objective_function, objective_function_weights=objective_function_weights, steps=trials_per_thread, verbose=verbose, max_eval_time_seconds=max_eval_time_seconds, max_time_seconds=max_time_seconds, study=study, relabel=False, pure=False, **objective_kwargs)) - - print(len(individuals)) - print(len(futures)) - dask.distributed.wait(futures) - - all_graphs = [] - all_scores = [] - for study, ind in zip(studies,individuals): - graphs, scores = get_all_individuals_from_study(study, ind) - all_graphs.extend(graphs) - all_scores.extend(scores) - - return all_graphs, all_scores - -def submit_helper(study, objective, n_trials, timeout, **kwargs): - objective = functools.partial(objective, **kwargs) - study.optimize(objective, n_trials=n_trials,timeout=timeout) - -def get_all_individuals_from_study(study, graphindividual, n_returned_models='all'): - all_graphs = [] - all_scores = [] - - if n_returned_models == 'pareto': - trials_list = study.best_trials - else: - trials_list = study.trials - - for trial in trials_list: - if not 'scores' in trial.user_attrs: - continue - - params = trial.user_attrs['params'] - scores = trial.user_attrs['scores'] - - newgraphindividual = copy.deepcopy(graphindividual) - newgraphindividual.key = None - try: - for node in newgraphindividual.full_node_list(): - if not isinstance(node, tpot2.NodeLabel): - continue - else: - if node.label in params: - node.hyperparameters = params[node.label] - - all_graphs.append(newgraphindividual) - all_scores.append(scores) - except Exception as e: - print('failed to create graphindividual from trial') - print(e) - print(traceback.format_exc()) - print(params) - print(newgraphindividual) - print(newgraphindividual.graph.nodes) - for node in newgraphindividual.full_node_list(): - print(node.label) - - - return all_graphs, all_scores \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/templates.py b/tpot2/individual_representations/graph_pipeline_individual/templates.py deleted file mode 100644 index 9b383141..00000000 --- a/tpot2/individual_representations/graph_pipeline_individual/templates.py +++ /dev/null @@ -1,75 +0,0 @@ - -import numpy as np -import tpot2 -import networkx as nx -from tpot2.individual_representations.graph_pipeline_individual import GraphIndividual - -from tpot2.individual_representations.graph_pipeline_individual.individual import create_node - - -# will randomly generate individuals (no predefined order) -def estimator_graph_individual_generator( - root_config_dict, - inner_config_dict=None, - leaf_config_dict=None, - max_size = np.inf, - linear_pipeline = False, - hyperparameter_probability = 1, - hyper_node_probability = 0, - hyperparameter_alpha = 1, - rng_=None, - **kwargs, - ) : - - rng = np.random.default_rng(rng_) - - while True: - - # if user specified limit, grab a random number between that limit - if max_size is not np.inf: - n_nodes = rng.integers(1,max_size+1) - # else, grab random number between 1,11 (theaksaini) - else: - n_nodes = rng.integers(1,11) - - graph = nx.DiGraph() - root = create_node(config_dict=root_config_dict, rng_=rng) # grab random root model method - graph.add_node(root) - - ind = GraphIndividual( rng_=rng, - inner_config_dict=inner_config_dict, - leaf_config_dict=leaf_config_dict, - root_config_dict=root_config_dict, - initial_graph = graph, - - max_size = max_size, - linear_pipeline = linear_pipeline, - hyperparameter_probability = hyperparameter_probability, - hyper_node_probability = hyper_node_probability, - hyperparameter_alpha = hyperparameter_alpha, - - **kwargs, - ) - - starting_ops = [] - if inner_config_dict is not None: - starting_ops.append(ind._mutate_insert_inner_node) - if leaf_config_dict is not None or inner_config_dict is not None: - starting_ops.append(ind._mutate_insert_leaf) - n_nodes -= 1 - - if len(starting_ops) > 0: - for _ in range(n_nodes-1): - func = rng.choice(starting_ops) - func(rng_=rng) - - yield ind - - -class BaggingCompositeGraphSklearn(): - def __init__(self) -> None: - pass - -class BoostingCompositeGraphSklearn(): - def __init__(self) -> None: - pass diff --git a/tpot2/individual_representations/subset_selector/__init__.py b/tpot2/individual_representations/subset_selector/__init__.py deleted file mode 100644 index e856439c..00000000 --- a/tpot2/individual_representations/subset_selector/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .subsetselector import SubsetSelector \ No newline at end of file diff --git a/tpot2/individual_representations/subset_selector/subsetselector.py b/tpot2/individual_representations/subset_selector/subsetselector.py deleted file mode 100644 index 5dc1d8af..00000000 --- a/tpot2/individual_representations/subset_selector/subsetselector.py +++ /dev/null @@ -1,57 +0,0 @@ -from numpy import iterable -import tpot2 -import numpy as np -from .. import BaseIndividual - -class SubsetSelector(BaseIndividual): - def __init__( self, - values, - rng_=None, - initial_set = None, - k=1, #step size for shuffling - ): - - rng = np.random.default_rng(rng_) - - if isinstance(values, int): - self.values = set(range(0,values)) - else: - self.values = set(values) - - - if initial_set is None: - self.subsets = set(rng.choices(values, k=k)) - else: - self.subsets = set(initial_set) - - self.k = k - - self.mutation_list = [self._mutate_add, self._mutate_remove] - self.crossover_list = [self._crossover_swap] - - def _mutate_add(self, rng_=None): - rng = np.random.default_rng(rng_) - not_included = list(self.values.difference(self.subsets)) - if len(not_included) > 1: - self.subsets.update(rng.choice(not_included, k=min(self.k, len(not_included)))) - return True - else: - return False - - def _mutate_remove(self, rng_=None): - rng = np.random.default_rng(rng_) - if len(self.subsets) > 1: - self.subsets = self.subsets - set(rng.choice(list(self.subsets), k=min(self.k, len(self.subsets)-1) )) - - def _crossover_swap(self, ss2, rng_=None): - rng = np.random.default_rng(rng_) - diffs = self.subsets.symmetric_difference(ss2.subsets) - - if len(diffs) == 0: - return False - for v in diffs: - self.subsets.discard(v) - ss2.subsets.discard(v) - rng.choice([self.subsets, ss2.subsets]).add(v) - - return True diff --git a/tpot2/population.py b/tpot2/population.py index a3a0c54c..5c102134 100644 --- a/tpot2/population.py +++ b/tpot2/population.py @@ -3,7 +3,7 @@ import copy import typing import tpot2 -from tpot2.individual_representations.individual import BaseIndividual +from tpot2 import BaseIndividual from traitlets import Bool import collections import pandas as pd @@ -12,32 +12,32 @@ import pickle import dask -def mutate(individual, rng_=None): - rng = np.random.default_rng(rng_) +def mutate(individual, rng=None): + rng = np.random.default_rng(rng) if isinstance(individual, collections.abc.Iterable): for ind in individual: - ind.mutate(rng_=rng) + ind.mutate(rng=rng) else: - individual.mutate(rng_=rng) + individual.mutate(rng=rng) return individual -def crossover(parents, rng_=None): - rng = np.random.default_rng(rng_) - parents[0].crossover(parents[1], rng_=rng) +def crossover(parents, rng=None): + rng = np.random.default_rng(rng) + parents[0].crossover(parents[1], rng=rng) return parents[0] -def mutate_and_crossover(parents, rng_=None): - rng = np.random.default_rng(rng_) - parents[0].crossover(parents[1], rng_=rng) - parents[0].mutate(rng_=rng) - parents[1].mutate(rng_=rng) +def mutate_and_crossover(parents, rng=None): + rng = np.random.default_rng(rng) + parents[0].crossover(parents[1], rng=rng) + parents[0].mutate(rng=rng) + parents[1].mutate(rng=rng) return parents -def crossover_and_mutate(parents, rng_=None): - rng = np.random.default_rng(rng_) +def crossover_and_mutate(parents, rng=None): + rng = np.random.default_rng(rng) for p in parents: - p.mutate(rng_=rng) - parents[0].crossover(parents[1], rng_=rng) + p.mutate(rng=rng) + parents[0].crossover(parents[1], rng=rng) return parents[0] @@ -90,19 +90,19 @@ def __init__( self, self.callback=callback self.population = [] - def survival_select(self, selector, weights, columns_names, n_survivors, rng_=None, inplace=True): - rng = np.random.default_rng(rng_) + def survival_select(self, selector, weights, columns_names, n_survivors, rng=None, inplace=True): + rng = np.random.default_rng(rng) weighted_scores = self.get_column(self.population, column_names=columns_names) * weights - new_population_index = np.ravel(selector(weighted_scores, k=n_survivors, rng_=rng)) #TODO make it clear that we are concatenating scores... + new_population_index = np.ravel(selector(weighted_scores, k=n_survivors, rng=rng)) #TODO make it clear that we are concatenating scores... new_population = np.array(self.population)[new_population_index] if inplace: - self.set_population(new_population, rng_=rng) + self.set_population(new_population, rng=rng) return new_population - def parent_select(self, selector, weights, columns_names, k, n_parents, rng_=None): - rng = np.random.default_rng(rng_) + def parent_select(self, selector, weights, columns_names, k, n_parents, rng=None): + rng = np.random.default_rng(rng) weighted_scores = self.get_column(self.population, column_names=columns_names) * weights - parents_index = selector(weighted_scores, k=k, n_parents=n_parents, rng_=rng) + parents_index = selector(weighted_scores, k=k, n_parents=n_parents, rng=rng) parents = np.array(self.population)[parents_index] return parents @@ -136,7 +136,7 @@ def remove_invalid_from_population(self, column_names, invalid_value = "INVALID" # returns a list of individuals added to the live population #TODO make keep repeats allow for previously evaluated individuals, #but make sure that the live population only includes one of each, no repeats - def add_to_population(self, individuals: typing.List[BaseIndividual], rng_=None, keep_repeats=False, mutate_until_unique=True): + def add_to_population(self, individuals: typing.List[BaseIndividual], rng=None, keep_repeats=False, mutate_until_unique=True): ''' Add individuals to the live population. Add individuals to the evaluated_individuals if they are not already there. @@ -149,7 +149,7 @@ def add_to_population(self, individuals: typing.List[BaseIndividual], rng_=None, If False, only add individuals that have not yet been added to geneology. ''' - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) if not isinstance(individuals, collections.abc.Iterable): individuals = [individuals] @@ -172,7 +172,7 @@ def add_to_population(self, individuals: typing.List[BaseIndividual], rng_=None, elif mutate_until_unique: #If its old and we don't want repeats, we can optionally mutate it until it is unique for _ in range(20): individual = copy.deepcopy(individual) - individual.mutate(rng_=rng) + individual.mutate(rng=rng) key = individual.unique_id() if key not in self.evaluated_individuals.index: self.evaluated_individuals.loc[key] = np.nan @@ -252,17 +252,17 @@ def get_unevaluated_individuals(self, column_names, individual_list=None): # return self.evaluated_individuals[~self.evaluated_individuals[column_names_to_check].isin(invalid_values).any(axis=1)] #the live population empied and is set to new_population - def set_population(self, new_population, rng_=None, keep_repeats=True): + def set_population(self, new_population, rng=None, keep_repeats=True): ''' sets population to new population for selection? ''' - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) self.population = [] - self.add_to_population(new_population, rng_=rng, keep_repeats=keep_repeats) + self.add_to_population(new_population, rng=rng, keep_repeats=keep_repeats) #TODO should we just generate one offspring per crossover? - def create_offspring(self, parents_list, var_op_list, rng_=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1): + def create_offspring(self, parents_list, var_op_list, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1): ''' parents_list: a list of lists of parents. var_op_list: a list of var_ops to apply to each list of parents. Should be the same length as parents_list. @@ -280,9 +280,9 @@ def create_offspring(self, parents_list, var_op_list, rng_=None, add_to_populati - "mutate_and_crossover" : mutate_and_crossover - "cross_and_mutate" : cross_and_mutate ''' - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) new_offspring = [] - all_offspring = parallel_create_offspring(parents_list, var_op_list, rng_=rng, n_jobs=n_jobs) + all_offspring = parallel_create_offspring(parents_list, var_op_list, rng=rng, n_jobs=n_jobs) for parents, offspring, var_op in zip(parents_list, all_offspring, var_op_list): @@ -295,7 +295,7 @@ def create_offspring(self, parents_list, var_op_list, rng_=None, add_to_populati # offspring = offspring[0] if add_to_population: - added = self.add_to_population(offspring, rng_=rng, keep_repeats=keep_repeats, mutate_until_unique=mutate_until_unique) + added = self.add_to_population(offspring, rng=rng, keep_repeats=keep_repeats, mutate_until_unique=mutate_until_unique) if len(added) > 0: for new_child in added: parent_keys = [parent.unique_id() for parent in parents] @@ -320,9 +320,9 @@ def create_offspring(self, parents_list, var_op_list, rng_=None, add_to_populati #TODO should we just generate one offspring per crossover? - def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng_=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True): + def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True): - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) new_offspring = [] all_offspring = [] @@ -332,29 +332,29 @@ def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutati #TODO put this loop in population class if var_op == "mutate": mutation_op = rng.choice(mutation_functions, p=mutation_function_weights) - all_offspring.append(copy_and_mutate(parents[0], mutation_op, rng_=rng)) + all_offspring.append(copy_and_mutate(parents[0], mutation_op, rng=rng)) chosen_ops.append(mutation_op.__name__) elif var_op == "crossover": crossover_op = rng.choice(crossover_functions, p=crossover_function_weights) - all_offspring.append(copy_and_crossover(parents, crossover_op, rng_=rng)) + all_offspring.append(copy_and_crossover(parents, crossover_op, rng=rng)) chosen_ops.append(crossover_op.__name__) elif var_op == "mutate_then_crossover": mutation_op1 = rng.choice(mutation_functions, p=mutation_function_weights) mutation_op2 = rng.choice(mutation_functions, p=mutation_function_weights) crossover_op = rng.choice(crossover_functions, p=crossover_function_weights) - p1 = copy_and_mutate(parents[0], mutation_op1, rng_=rng) - p2 = copy_and_mutate(parents[1], mutation_op2, rng_=rng) - crossover_op(p1,p2,rng_=rng) + p1 = copy_and_mutate(parents[0], mutation_op1, rng=rng) + p2 = copy_and_mutate(parents[1], mutation_op2, rng=rng) + crossover_op(p1,p2,rng=rng) all_offspring.append(p1) chosen_ops.append(f"{mutation_op1.__name__} , {mutation_op2.__name__} , {crossover_op.__name__}") elif var_op == "crossover_then_mutate": crossover_op = rng.choice(crossover_functions, p=crossover_function_weights) - child = copy_and_crossover(parents, crossover_op, rng_=rng) + child = copy_and_crossover(parents, crossover_op, rng=rng) mutation_op = rng.choice(mutation_functions, p=mutation_function_weights) - mutation_op(child, rng_=rng) + mutation_op(child, rng=rng) all_offspring.append(child) chosen_ops.append(f"{crossover_op.__name__} , {mutation_op.__name__}") @@ -370,7 +370,7 @@ def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutati # offspring = offspring[0] if add_to_population: - added = self.add_to_population(offspring, rng_=rng, keep_repeats=keep_repeats, mutate_until_unique=mutate_until_unique) + added = self.add_to_population(offspring, rng=rng, keep_repeats=keep_repeats, mutate_until_unique=mutate_until_unique) if len(added) > 0: for new_child in added: parent_keys = [parent.unique_id() for parent in parents] @@ -395,56 +395,56 @@ def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutati def get_id(individual): return individual.unique_id() -def parallel_create_offspring(parents_list, var_op_list, rng_=None, n_jobs=1): - rng = np.random.default_rng(rng_) +def parallel_create_offspring(parents_list, var_op_list, rng=None, n_jobs=1): + rng = np.random.default_rng(rng) if n_jobs == 1: - return nonparallel_create_offpring(parents_list, var_op_list, rng_=rng) + return nonparallel_create_offpring(parents_list, var_op_list, rng=rng) else: delayed_offspring = [] for parents, var_op in zip(parents_list,var_op_list): #TODO put this loop in population class if var_op in built_in_var_ops_dict: var_op = built_in_var_ops_dict[var_op] - delayed_offspring.append(dask.delayed(copy_and_change)(parents, var_op, rng_=rng)) + delayed_offspring.append(dask.delayed(copy_and_change)(parents, var_op, rng=rng)) offspring = dask.compute(*delayed_offspring, num_workers=n_jobs, threads_per_worker=1) return offspring -def nonparallel_create_offpring(parents_list, var_op_list, rng_=None, n_jobs=1): - rng = np.random.default_rng(rng_) +def nonparallel_create_offpring(parents_list, var_op_list, rng=None, n_jobs=1): + rng = np.random.default_rng(rng) offspring = [] for parents, var_op in zip(parents_list,var_op_list): #TODO put this loop in population class if var_op in built_in_var_ops_dict: var_op = built_in_var_ops_dict[var_op] - offspring.append(copy_and_change(parents, var_op, rng_=rng)) + offspring.append(copy_and_change(parents, var_op, rng=rng)) return offspring -def copy_and_change(parents, var_op, rng_=None): - rng = np.random.default_rng(rng_) +def copy_and_change(parents, var_op, rng=None): + rng = np.random.default_rng(rng) offspring = copy.deepcopy(parents) - offspring = var_op(offspring, rng_=rng) + offspring = var_op(offspring, rng=rng) if isinstance(offspring, collections.abc.Iterable): offspring = offspring[0] return offspring -def copy_and_mutate(parents, var_op, rng_=None): - rng = np.random.default_rng(rng_) +def copy_and_mutate(parents, var_op, rng=None): + rng = np.random.default_rng(rng) offspring = copy.deepcopy(parents) - var_op(offspring, rng_=rng) + var_op(offspring, rng=rng) if isinstance(offspring, collections.abc.Iterable): offspring = offspring[0] return offspring -def copy_and_crossover(parents, var_op, rng_=None): - rng = np.random.default_rng(rng_) +def copy_and_crossover(parents, var_op, rng=None): + rng = np.random.default_rng(rng) offspring = copy.deepcopy(parents) - var_op(offspring[0],offspring[1], rng_=rng) + var_op(offspring[0],offspring[1], rng=rng) return offspring[0] def parallel_get_id(n_jobs, individual_list): diff --git a/tpot2/search_spaces/__init__.py b/tpot2/search_spaces/__init__.py new file mode 100644 index 00000000..e7478460 --- /dev/null +++ b/tpot2/search_spaces/__init__.py @@ -0,0 +1,4 @@ +from .base import * +from . import nodes +from . import pipelines +from . import templates \ No newline at end of file diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py new file mode 100644 index 00000000..2c91beb6 --- /dev/null +++ b/tpot2/search_spaces/base.py @@ -0,0 +1,34 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from sklearn.base import BaseEstimator + + +class SklearnIndividual(tpot2.BaseIndividual): + + def __init__(self,) -> None: + super().__init__() + + def mutate(self, rng=None): + return + + def crossover(self, other, rng=None): + return + + def export_pipeline(self) -> BaseEstimator: + return + + def unique_id(self): + return + + +class SklearnIndividualGenerator(): + def __init__(self,): + pass + + def generate(self, rng=None) -> SklearnIndividual: + pass \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/__init__.py b/tpot2/search_spaces/nodes/__init__.py new file mode 100644 index 00000000..35cebf87 --- /dev/null +++ b/tpot2/search_spaces/nodes/__init__.py @@ -0,0 +1,2 @@ +from .estimator_node import * +from .genetic_feature_selection import * \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py new file mode 100644 index 00000000..e44dc4f1 --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -0,0 +1,55 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace + +class EstimatorNodeIndividual(SklearnIndividual): + def __init__(self, method: type, + space: ConfigurationSpace, + rng=None) -> None: + super().__init__() + self.method = method + self.space = space + + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = self.space.sample_configuration().get_dictionary() + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = self.space.sample_configuration().get_dictionary() + + return True + + def crossover(self, other, rng=None): + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameters) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + return (self.method, self.hyperparameters) + +class EstimatorNode(SklearnIndividualGenerator): + def __init__(self, method, space): + self.method = method + self.space = space + + def generate(self, rng=None): + return EstimatorNodeIndividual(self.method, self.space) \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node_simple.py b/tpot2/search_spaces/nodes/estimator_node_simple.py new file mode 100644 index 00000000..0d876f0b --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node_simple.py @@ -0,0 +1,64 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator + +class EstimatorNodeIndividual(SklearnIndividual): + def __init__(self, method, space ) -> None: + super().__init__() + self.method = method + self.space = space + + self._mutate_hyperparameters() + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + return self._mutate_hyperparameters(rng) + + def _mutate_hyperparameters(self, rng=None): + rng = np.random.default_rng(rng) + self.hyperparameters = {} + #sample new hyperparameters from the space + for hyperparameter in self.space: + hyperparameter_space = self.space[hyperparameter] + if isinstance(hyperparameter_space, list): + hp = rng.choice(hyperparameter_space) + elif isinstance(hyperparameter_space, tuple): + hp = rng.uniform(hyperparameter_space[0], hyperparameter_space[1]) + else: + hp = hyperparameter_space + + self.hyperparameters[hyperparameter] = hp + + return True + + def crossover(self, other, rng=None): + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameters) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + return (self.method, self.hyperparameters) + +class EstimatorNode(SklearnIndividualGenerator): + def __init__(self, method, space): + self.method = method + self.space = space + + def generate(self, rng=None): + return EstimatorNodeIndividual(self.method, self.space) \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py new file mode 100644 index 00000000..54761123 --- /dev/null +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -0,0 +1,178 @@ +from numpy import iterable +import tpot2 +import numpy as np +import sklearn +import sklearn.datasets +import numpy as np + +import pandas as pd +import os, os.path +from sklearn.base import BaseEstimator +from sklearn.feature_selection._base import SelectorMixin + +from ..base import SklearnIndividual, SklearnIndividualGenerator + +class MaskSelector(BaseEstimator, SelectorMixin): + """Select predefined feature subsets.""" + + def __init__(self, mask): + self.mask = mask + + def fit(self, X, y=None): + return self + + def _get_support_mask(self): + return np.array(self.mask) + + +class GeneticFeatureSelectorIndividual(SklearnIndividual): + def __init__( self, + mask, + start_p=0.2, + mutation_rate = 0.5, + crossover_rate = 0.5, + mutation_rate_rate = 0, + crossover_rate_rate = 0, + rng=None, + ): + + self.start_p = start_p + self.mutation_rate = mutation_rate + self.crossover_rate = crossover_rate + self.mutation_rate_rate = mutation_rate_rate + self.crossover_rate_rate = crossover_rate_rate + + rng = np.random.default_rng(rng) + + if isinstance(mask, int): + #list of random bollean values + self.mask = rng.choice([True, False], size=mask, p=[self.start_p,1-self.start_p]) + else: + self.mask = mask + + self.mutation_list = [self._mutate_add, self._mutate_remove] + self.crossover_list = [self._crossover_swap] + + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + + if rng.uniform() < self.mutation_rate_rate: + self.mutation_rate = self.mutation_rate * rng.uniform(0.5, 2) + self.mutation_rate = min(self.mutation_rate, 2) + self.mutation_rate = max(self.mutation_rate, 1/len(self.mask)) + + return rng.choice(self.mutation_list)(rng) + + def crossover(self, other, rng=None): + rng = np.random.default_rng(rng) + + if rng.uniform() < self.crossover_rate_rate: + self.crossover_rate = self.crossover_rate * rng.uniform(0.5, 2) + self.crossover_rate = min(self.crossover_rate, .6) + self.crossover_rate = max(self.crossover_rate, 1/len(self.mask)) + + return rng.choice(self.crossover_list)(other, rng) + + + # def _mutate_add(self, rng=None): + # rng = np.random.default_rng(rng) + + # add_mask = rng.choice([True, False], size=self.mask.shape, p=[self.mutation_rate,1-self.mutation_rate]) + # self.mask = np.logical_or(self.mask, add_mask) + # return True + + # def _mutate_remove(self, rng=None): + # rng = np.random.default_rng(rng) + + # add_mask = rng.choice([False, True], size=self.mask.shape, p=[self.mutation_rate,1-self.mutation_rate]) + # self.mask = np.logical_and(self.mask, add_mask) + # return True + + def _mutate_add(self, rng=None): + rng = np.random.default_rng(rng) + + num_pos = np.sum(self.mask) + num_neg = len(self.mask) - num_pos + + if num_neg == 0: + return False + + to_add = int(self.mutation_rate * num_pos) + to_add = max(to_add, 1) + + p = to_add / num_neg + p = min(p, 1) + + add_mask = rng.choice([True, False], size=self.mask.shape, p=[p,1-p]) + if sum(np.logical_or(self.mask, add_mask)) == 0: + pass + self.mask = np.logical_or(self.mask, add_mask) + return True + + def _mutate_remove(self, rng=None): + rng = np.random.default_rng(rng) + + num_pos = np.sum(self.mask) + if num_pos == 1: + return False + + num_neg = len(self.mask) - num_pos + + to_remove = int(self.mutation_rate * num_pos) + to_remove = max(to_remove, 1) + + p = to_remove / num_pos + p = min(p, .5) + + remove_mask = rng.choice([True, False], size=self.mask.shape, p=[p,1-p]) + self.mask = np.logical_and(self.mask, remove_mask) + + + if sum(self.mask) == 0: + index = rng.choice(len(self.mask)) + self.mask[index] = True + + return True + + def _crossover_swap(self, ss2, rng=None): + rng = np.random.default_rng(rng) + mask = rng.choice([True, False], size=self.mask.shape, p=[self.crossover_rate,1-self.crossover_rate]) + + self.mask = np.where(mask, self.mask, ss2.mask) + + def export_pipeline(self): + return MaskSelector(mask=self.mask) + + + def unique_id(self): + return self.mask + + +class GeneticFeatureSelectorNode(SklearnIndividualGenerator): + def __init__(self, + mask, + start_p=0.2, + mutation_rate = 0.5, + crossover_rate = 0.5, + mutation_rate_rate = 0, + crossover_rate_rate = 0, + rng=None,): + + self.mask = mask + self.start_p = start_p + self.mutation_rate = mutation_rate + self.crossover_rate = crossover_rate + self.mutation_rate_rate = mutation_rate_rate + self.crossover_rate_rate = crossover_rate_rate + self.rng = rng + + def generate(self, rng=None) -> SklearnIndividual: + return GeneticFeatureSelectorIndividual( mask=self.mask, + start_p=self.start_p, + mutation_rate=self.mutation_rate, + crossover_rate=self.crossover_rate, + mutation_rate_rate=self.mutation_rate_rate, + crossover_rate_rate=self.crossover_rate_rate, + rng=self.rng + ) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/__init__.py b/tpot2/search_spaces/pipelines/__init__.py new file mode 100644 index 00000000..ec90eb0e --- /dev/null +++ b/tpot2/search_spaces/pipelines/__init__.py @@ -0,0 +1,6 @@ +from .choice import * +from .dynamic_linear import * +from .sequential import * +from .graph import * +from .tree import * +from .wrapper import * \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py new file mode 100644 index 00000000..cdf0c6d6 --- /dev/null +++ b/tpot2/search_spaces/pipelines/choice.py @@ -0,0 +1,52 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator + +class ChoicePipelineIndividual(SklearnIndividual): + def __init__(self, choice_list : List[SklearnIndividualGenerator], rng=None) -> None: + super().__init__() + + self.choice_list = choice_list + self.node = np.random.default_rng(rng).choice(self.choice_list).generate() + + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + if rng.choice([True, False]): + return self._mutate_select_new_node(rng) + else: + return self._mutate_node(rng) + + def _mutate_select_new_node(self, rng=None): + self.node = random.choice(self.choice_list).generate() + return True + + def _mutate_node(self, rng=None): + return self.node.mutate(rng) + + def crossover(self, other, rng=None): + return self.node.crossover(other.node, rng) + + def export_pipeline(self): + return self.node.export_pipeline() + + def unique_id(self): + return self.node.unique_id() + + +class ChoicePipeline(SklearnIndividualGenerator): + def __init__(self, choice_list : List[SklearnIndividualGenerator] ) -> None: + self.choice_list = choice_list + + """ + Takes in a list of search spaces. Will select one node from the search space. + + """ + + def generate(self, rng=None): + return ChoicePipelineIndividual(self.choice_list) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py new file mode 100644 index 00000000..7408fe8b --- /dev/null +++ b/tpot2/search_spaces/pipelines/dynamic_linear.py @@ -0,0 +1,97 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator + +import copy + +class DynamicLinearPipelineIndividual(SklearnIndividual): + # takes in a single search space. + # will produce a pipeline of variable length. Each step in the pipeline will be pulled from the search space provided. + + def __init__(self, search_space : SklearnIndividualGenerator, min_length: int, max_length: int ) -> None: + super().__init__() + + rng = np.random.default_rng() + + self.search_space = search_space + self.min_length = min_length + self.max_length = max_length + + self.pipeline = self._generate_pipeline(rng) + + def _generate_pipeline(self, rng=None): + rng = np.random.default_rng() + pipeline = [] + length = rng.integers(self.min_length, self.max_length) + for _ in range(length): + pipeline.append(self.search_space.generate(rng)) + return pipeline + + + def mutate(self, rng=None): + rng = np.random.default_rng() + options = [] + if len(self.pipeline) > self.min_length: + options.append(self._mutate_remove_node) + if len(self.pipeline) < self.max_length: + options.append(self._mutate_add_node) + options.append(self._mutate_step) + + return rng.choice(options)(rng) + + def _mutate_add_node(self, rng=None): + rng = np.random.default_rng() + new_node = self.search_space.generate(rng) + idx = rng.integers(len(self.pipeline)) + self.pipeline.insert(idx, new_node) + + def _mutate_remove_node(self, rng=None): + rng = np.random.default_rng() + idx = rng.integers(len(self.pipeline)) + self.pipeline.pop(idx) + + def _mutate_step(self, rng=None): + #choose a random step in the pipeline and mutate it + rng = np.random.default_rng() + step = rng.choice(self.pipeline) + return step.mutate(rng) + + + def crossover(self, other, rng=None): + rng = np.random.default_rng() + + if len(self.pipeline) < 2 or len(other.pipeline) < 2: + return False + + idx = rng.integers(1,len(self.pipeline)) + idx2 = rng.integers(1,len(other.pipeline)) + self.pipeline[idx:] = copy.deepcopy(other.pipeline[idx2:]) + + return True + + def export_pipeline(self, **graph_pipeline_args): + return [step.export_pipeline(**graph_pipeline_args) for step in self.pipeline] + + def unique_id(self): + return tuple([step.unique_id() for step in self.pipeline]) + + +class DynamicLinearPipeline(SklearnIndividualGenerator): + def __init__(self, search_space : SklearnIndividualGenerator, min_length: int, max_length: int ) -> None: + self.search_space = search_space + self.min_length = min_length + self.max_length = max_length + + """ + Takes in a single search space. Will produce a linear pipeline of variable length. Each step in the pipeline will be pulled from the search space provided. + + + """ + + def generate(self, rng=None): + return DynamicLinearPipelineIndividual(self.search_space, self.min_length, self.max_length) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/genetic_sample_weight.py b/tpot2/search_spaces/pipelines/genetic_sample_weight.py new file mode 100644 index 00000000..db731a85 --- /dev/null +++ b/tpot2/search_spaces/pipelines/genetic_sample_weight.py @@ -0,0 +1 @@ +from ..base import SklearnIndividual, SklearnIndividualGenerator \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py new file mode 100644 index 00000000..9332a011 --- /dev/null +++ b/tpot2/search_spaces/pipelines/graph.py @@ -0,0 +1,645 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +import networkx as nx +import copy +import matplotlib.pyplot as plt +import itertools +from .graph_utils import * +from ..nodes.estimator_node import EstimatorNodeIndividual + + +class GraphPipelineIndividual(SklearnIndividual): + def __init__(self, + root_search_space : SklearnIndividualGenerator, + leaf_search_space : SklearnIndividualGenerator = None, + inner_search_space : SklearnIndividualGenerator =None, + max_size: int = 10, + crossover_same_depth=False, + rng=None) -> None: + """ + Generates a tree shaped pipeline individual. Can be used to export a sklearn Pipeline that uses feature unions to merge branches of the pipeline. + + """ + super().__init__() + + self.__debug = False + + rng = np.random.default_rng(rng) + + self.root_search_space = root_search_space + self.leaf_search_space = leaf_search_space + self.inner_search_space = inner_search_space + self.max_size = max_size + self.crossover_same_depth = crossover_same_depth + + self.root = self.root_search_space.generate(rng) + self.graph = nx.DiGraph() + self.graph.add_node(self.root) + + if self.leaf_search_space is not None: + self.leaf = self.leaf_search_space.generate(rng) + self.graph.add_node(self.leaf) + self.graph.add_edge(self.root, self.leaf) + + self.mutate_methods_list = [self._mutate_insert_leaf, self._mutate_insert_inner_node, self._mutate_remove_node, self._mutate_node] + self.crossover_methods_list = [self._crossover_swap_branch, self._crossover_swap_node, self._crossover_take_branch] #TODO self._crossover_nodes, + + self.merge_duplicated_nodes_toggle = True + + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + self.key = None + + rng.shuffle(self.mutate_methods_list) + for mutate_method in self.mutate_methods_list: + if mutate_method(rng=rng): + + if self.merge_duplicated_nodes_toggle: + self._merge_duplicated_nodes() + + if self.__debug: + print(mutate_method) + + if self.root not in self.graph.nodes: + print('lost root something went wrong with ', mutate_method) + + if len(self.graph.predecessors(self.root)) > 0: + print('root has parents ', mutate_method) + + if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]): + print('a node is connecting to itself...') + + if self.__debug: + try: + nx.find_cycle(self.graph) + print('something went wrong with ', mutate_method) + except: + pass + + return True + + return False + + + + + def _mutate_insert_leaf(self, rng=None): + rng = np.random.default_rng(rng) + if self.max_size > self.graph.number_of_nodes(): + sorted_nodes_list = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another + for node in sorted_nodes_list: + #if leafs are protected, check if node is a leaf + #if node is a leaf, skip because we don't want to add node on top of node + if (self.leaf_search_space is not None #if leafs are protected + and len(list(self.graph.successors(node))) == 0 #if node is leaf + and len(list(self.graph.predecessors(node))) > 0 #except if node is root, in which case we want to add a leaf even if it happens to be a leaf too + ): + + continue + + #If node *is* the root or is not a leaf, add leaf node. (dont want to add leaf on top of leaf) + if self.leaf_search_space is not None: + new_node = self.leaf_search_space.generate(rng) + else: + new_node = self.inner_search_space.generate(rng) + + self.graph.add_node(new_node) + self.graph.add_edge(node, new_node) + return True + + return False + + def _mutate_insert_inner_node(self, rng=None): + rng = np.random.default_rng(rng) + if self.max_size > self.graph.number_of_nodes(): + sorted_nodes_list = list(self.graph.nodes) + sorted_nodes_list2 = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another + rng.shuffle(sorted_nodes_list2) + for node in sorted_nodes_list: + #loop through children of node + for child_node in list(self.graph.successors(node)): + + if child_node is not node and child_node not in nx.ancestors(self.graph, node): + if self.leaf_search_space is not None: + #If if we are protecting leafs, dont add connection into a leaf + if len(list(nx.descendants(self.graph,node))) ==0 : + continue + + new_node = self.inner_search_space.generate(rng) + + self.graph.add_node(new_node) + self.graph.add_edges_from([(node, new_node), (new_node, child_node)]) + self.graph.remove_edge(node, child_node) + return True + + return False + + + def _mutate_remove_node(self, rng=None): + ''' + Removes a randomly chosen node and connects its parents to its children. + If the node is the only leaf for an inner node and 'leaf_search_space' is not none, we do not remove it. + ''' + rng = np.random.default_rng(rng) + nodes_list = list(self.graph.nodes) + nodes_list.remove(self.root) + leaves = get_leaves(self.graph) + + while len(nodes_list) > 0: + node = rng.choice(nodes_list) + nodes_list.remove(node) + + if self.leaf_search_space is not None and len(list(nx.descendants(self.graph,node))) == 0 : #if the node is a leaf + if len(leaves) <= 1: + continue #dont remove the last leaf + leaf_parents = self.graph.predecessors(node) + + # if any of the parents of the node has one one child, continue + if any([len(list(self.graph.successors(lp))) < 2 for lp in leaf_parents]): #dont remove a leaf if it is the only input into another node. + continue + + remove_and_stitch(self.graph, node) + remove_nodes_disconnected_from_node(self.graph, self.root) + return True + + else: + remove_and_stitch(self.graph, node) + remove_nodes_disconnected_from_node(self.graph, self.root) + return True + + return False + + + + def _mutate_node(self, rng=None): + ''' + Mutates the hyperparameters for a randomly chosen node in the graph. + ''' + rng = np.random.default_rng(rng) + sorted_nodes_list = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) + completed_one = False + for node in sorted_nodes_list: + if node.mutate(rng): + return True + return False + + def _mutate_remove_edge(self, rng=None): + ''' + Deletes an edge as long as deleting that edge does not make the graph disconnected. + ''' + rng = np.random.default_rng(rng) + sorted_nodes_list = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) + for child_node in sorted_nodes_list: + parents = list(self.graph.predecessors(child_node)) + if len(parents) > 1: # if it has more than one parent, you can remove an edge (if this is the only child of a node, it will become a leaf) + + for parent_node in parents: + # if removing the egde will make the parent_node a leaf node, skip + if self.leaf_search_space is not None and len(list(self.graph.successors(parent_node))) < 2: + continue + + self.graph.remove_edge(parent_node, child_node) + return True + return False + + def _mutate_add_edge(self, rng=None): + ''' + Randomly add an edge from a node to another node that is not an ancestor of the first node. + ''' + rng = np.random.default_rng(rng) + sorted_nodes_list = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) + for child_node in sorted_nodes_list: + for parent_node in sorted_nodes_list: + if self.leaf_search_space is not None: + if len(list(self.graph.successors(parent_node))) == 0: + continue + + # skip if + # - parent and child are the same node + # - edge already exists + # - child is an ancestor of parent + if (child_node is not parent_node) and not self.graph.has_edge(parent_node,child_node) and (child_node not in nx.ancestors(self.graph, parent_node)): + self.graph.add_edge(parent_node,child_node) + return True + + return False + + def _mutate_insert_bypass_node(self, rng=None): + rng = np.random.default_rng(rng) + if self.max_size > self.graph.number_of_nodes(): + sorted_nodes_list = list(self.graph.nodes) + sorted_nodes_list2 = list(self.graph.nodes) + rng.shuffle(sorted_nodes_list) #TODO: sort by number of children and/or parents? bias model one way or another + rng.shuffle(sorted_nodes_list2) + for node in sorted_nodes_list: + for child_node in sorted_nodes_list2: + if child_node is not node and child_node not in nx.ancestors(self.graph, node): + if self.leaf_search_space is not None: + #If if we are protecting leafs, dont add connection into a leaf + if len(list(nx.descendants(self.graph,node))) ==0 : + continue + + new_node = self.inner_search_space.generate(rng) + + self.graph.add_node(new_node) + self.graph.add_edges_from([(node, new_node), (new_node, child_node)]) + return True + + return False + + + def crossover(self, ind2, rng=None): + ''' + self is the first individual, ind2 is the second individual + If crossover_same_depth, it will select graphindividuals at the same recursive depth. + Otherwise, it will select graphindividuals randomly from the entire graph and its subgraphs. + + This does not impact graphs without subgraphs. And it does not impacts nodes that are not graphindividuals. Cros + ''' + + rng = np.random.default_rng(rng) + + rng.shuffle(self.crossover_methods_list) + + finished = False + + for crossover_method in self.crossover_methods_list: + if crossover_method(ind2, rng=rng): + self._merge_duplicated_nodes() + finished = True + break + + if self.__debug: + try: + nx.find_cycle(self.graph) + print('something went wrong with ', crossover_method) + except: + pass + + return finished + + + def _crossover_swap_branch(self, G2, rng=None): + ''' + swaps a branch from parent1 with a branch from parent2. does not modify parent2 + ''' + rng = np.random.default_rng(rng) + + if self.crossover_same_depth: + pair_gen = select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng=rng) + else: + pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) + + for node1, node2 in pair_gen: + #TODO: if root is in inner_config_dict, then do use it? + if node1 is self.root or node2 is G2.root: #dont want to add root as inner node + continue + + #check if node1 is a leaf and leafs are protected, don't add an input to the leave + if self.leaf_search_space is not None: #if we are protecting leaves, + node1_is_leaf = len(list(self.graph.successors(node1))) == 0 + node2_is_leaf = len(list(G2.graph.successors(node2))) == 0 + #if not ((node1_is_leaf and node1_is_leaf) or (not node1_is_leaf and not node2_is_leaf)): #if node1 is a leaf + if (node1_is_leaf and (not node2_is_leaf)) or ( (not node1_is_leaf) and node2_is_leaf): + #only continue if node1 and node2 are both leaves or both not leaves + continue + + temp_graph_1 = self.graph.copy() + temp_graph_1.remove_node(node1) + remove_nodes_disconnected_from_node(temp_graph_1, self.root) + + #isolating the branch + branch2 = G2.graph.copy() + n2_descendants = nx.descendants(branch2,node2) + for n in list(branch2.nodes): + if n not in n2_descendants and n is not node2: #removes all nodes not in the branch + branch2.remove_node(n) + + branch2 = copy.deepcopy(branch2) + branch2_root = get_roots(branch2)[0] + temp_graph_1.add_edges_from(branch2.edges) + for p in list(self.graph.predecessors(node1)): + temp_graph_1.add_edge(p,branch2_root) + + if temp_graph_1.number_of_nodes() > self.max_size: + continue + + self.graph = temp_graph_1 + + return True + return False + + + def _crossover_take_branch(self, G2, rng=None): + ''' + Takes a subgraph from Parent2 and add it to a randomly chosen node in Parent1. + ''' + rng = np.random.default_rng(rng) + + if self.crossover_same_depth: + pair_gen = select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng=rng) + else: + pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) + + for node1, node2 in pair_gen: + #TODO: if root is in inner_config_dict, then do use it? + if node2 is G2.root: #dont want to add root as inner node + continue + + + #check if node1 is a leaf and leafs are protected, don't add an input to the leave + if self.leaf_search_space is not None and len(list(self.graph.successors(node1))) == 0: + continue + + #icheck if node2 is graph individual + # if isinstance(node2,GraphIndividual): + # if not ((isinstance(node2,GraphIndividual) and ("Recursive" in self.inner_config_dict or "Recursive" in self.leaf_search_space))): + # continue + + #isolating the branch + branch2 = G2.graph.copy() + n2_descendants = nx.descendants(branch2,node2) + for n in list(branch2.nodes): + if n not in n2_descendants and n is not node2: #removes all nodes not in the branch + branch2.remove_node(n) + + #if node1 plus node2 branch has more than max_children, skip + if branch2.number_of_nodes() + self.graph.number_of_nodes() > self.max_size: + continue + + branch2 = copy.deepcopy(branch2) + branch2_root = get_roots(branch2)[0] + self.graph.add_edges_from(branch2.edges) + self.graph.add_edge(node1,branch2_root) + + return True + return False + + + + def _crossover_nodes(self, G2, rng=None): + ''' + Swaps the hyperparamters of one randomly chosen node in Parent1 with the hyperparameters of randnomly chosen node in Parent2. + ''' + rng = np.random.default_rng(rng) + + if self.crossover_same_depth: + pair_gen = select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng=rng) + else: + pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) + + for node1, node2 in pair_gen: + + #if both nodes are leaves + if len(list(self.graph.successors(node1)))==0 and len(list(G2.graph.successors(node2)))==0: + + try: + if node1.crossover(node2): + return True + except: + pass + + #if both nodes are inner nodes + if len(list(self.graph.successors(node1)))>0 and len(list(G2.graph.successors(node2)))>0: + if len(list(self.graph.predecessors(node1)))>0 and len(list(G2.graph.predecessors(node2))): + if node1.crossover(node2): + return True + + #if both nodes are root nodes + if node1 is self.root and node2 is G2.root: + if node1.crossover(node2): + return True + + + return False + + #not including the nodes, just their children + #Finds leaves attached to nodes and swaps them + def _crossover_swap_leaf_at_node(self, G2, rng=None): + rng = np.random.default_rng(rng) + + if self.crossover_same_depth: + pair_gen = select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng=rng) + else: + pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) + + success = False + for node1, node2 in pair_gen: + # if leaves are protected node1 and node2 must both be leaves or both be inner nodes + if self.leaf_search_space is not None and not (len(list(self.graph.successors(node1)))==0 ^ len(list(G2.graph.successors(node2)))==0): + continue + #self_leafs = [c for c in nx.descendants(self.graph,node1) if len(list(self.graph.successors(c)))==0 and c is not node1] + node_leafs = [c for c in nx.descendants(G2.graph,node2) if len(list(G2.graph.successors(c)))==0 and c is not node2] + + # if len(self_leafs) >0: + # for c in self_leafs: + # if random.choice([True,False]): + # self.graph.remove_node(c) + # G2.graph.add_edge(node2, c) + # success = True + + if len(node_leafs) >0: + for c in node_leafs: + if rng.choice([True,False]): + G2.graph.remove_node(c) + self.graph.add_edge(node1, c) + success = True + + return success + + + + + def _crossover_swap_node(self, G2, rng=None): + ''' + Swaps randomly chosen node from Parent1 with a randomly chosen node from Parent2. + ''' + rng = np.random.default_rng(rng) + + if self.crossover_same_depth: + pair_gen = select_nodes_same_depth(self.graph, self.root, G2.graph, G2.root, rng=rng) + else: + pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) + + for node1, node2 in pair_gen: + if node1 is self.root or node2 is G2.root: #TODO: allow root + continue + + #if leaves are protected + if self.leaf_search_space is not None: + #if one node is a leaf, the other must be a leaf + if not((len(list(self.graph.successors(node1)))==0) ^ (len(list(G2.graph.successors(node2)))==0)): + continue #only continue if both are leaves, or both are not leaves + + + n1_s = self.graph.successors(node1) + n1_p = self.graph.predecessors(node1) + + n2_s = G2.graph.successors(node2) + n2_p = G2.graph.predecessors(node2) + + self.graph.remove_node(node1) + G2.graph.remove_node(node2) + + self.graph.add_node(node2) + + self.graph.add_edges_from([ (node2, n) for n in n1_s]) + G2.graph.add_edges_from([ (node1, n) for n in n2_s]) + + self.graph.add_edges_from([ (n, node2) for n in n1_p]) + G2.graph.add_edges_from([ (n, node1) for n in n2_p]) + + return True + + return False + + + def _merge_duplicated_nodes(self): + + graph_changed = False + merged = False + while(not merged): + node_list = list(self.graph.nodes) + merged = True + for node, other_node in itertools.product(node_list, node_list): + if node is other_node or (not isinstance(node, EstimatorNodeIndividual)) or (not isinstance(other_node, EstimatorNodeIndividual)): #TODO make this account for other types of nodes. maybe add a __eq__ method to the nodes + continue + + #If nodes are same class/hyperparameters + if node.method == other_node.method and node.hyperparameters == other_node.hyperparameters: + node_children = set(self.graph.successors(node)) + other_node_children = set(self.graph.successors(other_node)) + #if nodes have identical children, they can be merged + if node_children == other_node_children: + for other_node_parent in list(self.graph.predecessors(other_node)): + if other_node_parent not in self.graph.predecessors(node): + self.graph.add_edge(other_node_parent,node) + + self.graph.remove_node(other_node) + merged=False + graph_changed = True + break + + return graph_changed + + + def export_pipeline(self, **graph_pipeline_args): + estimator_graph = self.graph.copy() + + #mapping = {node:node.method_class(**node.hyperparameters) for node in estimator_graph} + label_remapping = {} + label_to_instance = {} + + for node in estimator_graph: + this_pipeline_node = node.export_pipeline() + found_unique_label = False + i=1 + while not found_unique_label: + label = "{0}_{1}".format(this_pipeline_node.__class__.__name__, i) + if label not in label_to_instance: + found_unique_label = True + else: + i+=1 + + label_remapping[node] = label + label_to_instance[label] = this_pipeline_node + + estimator_graph = nx.relabel_nodes(estimator_graph, label_remapping) + + for label, instance in label_to_instance.items(): + estimator_graph.nodes[label]["instance"] = instance + + return tpot2.GraphPipeline(graph=estimator_graph, **graph_pipeline_args) + + + def plot(self): + G = self.graph.reverse() + #TODO clean this up + try: + pos = nx.planar_layout(G) # positions for all nodes + except: + pos = nx.shell_layout(G) + # nodes + options = {'edgecolors': 'tab:gray', 'node_size': 800, 'alpha': 0.9} + nodelist = list(G.nodes) + node_color = [plt.cm.Set1(G.nodes[n]['recursive depth']) for n in G] + + fig, ax = plt.subplots() + + nx.draw(G, pos, nodelist=nodelist, node_color=node_color, ax=ax, **options) + + + '''edgelist = [] + for n in n1.node_set: + for child in n.children: + edgelist.append((n,child))''' + + # edges + #nx.draw_networkx_edges(G, pos, width=3.0, arrows=True) + '''nx.draw_networkx_edges( + G, + pos, + edgelist=[edgelist], + width=8, + alpha=0.5, + edge_color='tab:red', + )''' + + + + # some math labels + labels = {} + for i, n in enumerate(G.nodes): + labels[n] = n.method_class.__name__ + "\n" + str(n.hyperparameters) + + + nx.draw_networkx_labels(G, pos, labels,ax=ax, font_size=7, font_color='black') + + plt.tight_layout() + plt.axis('off') + plt.show() + + + + + + + def unique_id(self): + return + + +class GraphPipeline(SklearnIndividualGenerator): + def __init__(self, root_search_space : SklearnIndividualGenerator, + leaf_search_space : SklearnIndividualGenerator = None, + inner_search_space : SklearnIndividualGenerator =None, + max_size: int = 10, + crossover_same_depth=False, + rng=None) -> None: + + """ + Generates a directed acyclic graph of variable size. Search spaces for root, leaf, and inner nodes can be defined separately if desired. + + + """ + + + self.search_space = root_search_space + self.leaf_search_space = leaf_search_space + self.inner_search_space = inner_search_space + self.max_size = max_size + self.crossover_same_depth = crossover_same_depth + + def generate(self, rng=None): + return GraphPipelineIndividual(self.search_space, self.leaf_search_space, self.inner_search_space, self.max_size, self.crossover_same_depth, rng=rng) \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/graph_utils/graph_utils.py b/tpot2/search_spaces/pipelines/graph_utils.py similarity index 93% rename from tpot2/individual_representations/graph_pipeline_individual/graph_utils/graph_utils.py rename to tpot2/search_spaces/pipelines/graph_utils.py index 1956d49d..2470a07b 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/graph_utils/graph_utils.py +++ b/tpot2/search_spaces/pipelines/graph_utils.py @@ -55,8 +55,8 @@ def invert_dictionary(d): return inv_map -def select_nodes_same_depth(g1, node1, g2, node2, rng_=None): - rng = np.random.default_rng(rng_) +def select_nodes_same_depth(g1, node1, g2, node2, rng=None): + rng = np.random.default_rng(rng) g1_nodes = nx.shortest_path_length(g1, source=node1) g2_nodes = nx.shortest_path_length(g2, source=node2) @@ -86,8 +86,8 @@ def select_nodes_same_depth(g1, node1, g2, node2, rng_=None): for p in possible_pairs: yield p[0], p[1] -def select_nodes_randomly(g1, g2, rng_=None): - rng = np.random.default_rng(rng_) +def select_nodes_randomly(g1, g2, rng=None): + rng = np.random.default_rng(rng) sorted_self_nodes_list = list(g1.nodes) rng.shuffle(sorted_self_nodes_list) diff --git a/tpot2/search_spaces/pipelines/hierarchical_individual.py b/tpot2/search_spaces/pipelines/hierarchical_individual.py new file mode 100644 index 00000000..db731a85 --- /dev/null +++ b/tpot2/search_spaces/pipelines/hierarchical_individual.py @@ -0,0 +1 @@ +from ..base import SklearnIndividual, SklearnIndividualGenerator \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py new file mode 100644 index 00000000..4459a284 --- /dev/null +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -0,0 +1,62 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator + +class SequentialPipelineIndividual(SklearnIndividual): + # takes in a list of search spaces. each space is a list of SklearnIndividualGenerators. + # will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index. + + def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None: + super().__init__() + self.search_spaces = search_spaces + self.pipeline = self._generate_pipeline() + + def _generate_pipeline(self, rng=None): + pipeline = [] + for space in self.search_spaces: + pipeline.append(space.generate(rng)) + return pipeline + + def mutate(self, rng=None): + rng = np.random.default_rng() + step = rng.choice(self.pipeline) + return step.mutate(rng) + + + def crossover(self, other, rng=None): + #swap a random step in the pipeline with the corresponding step in the other pipeline + + if len(self.pipeline) != len(other.pipeline): + return False + + if len(self.pipeline) < 2: + return False + + rng = np.random.default_rng() + idx = rng.integers(1,len(self.pipeline)) + + self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] + return True + + def export_pipeline(self): + return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline]) + + def unique_id(self): + return tuple([step.unique_id() for step in self.pipeline]) + + +class SequentialPipeline(SklearnIndividualGenerator): + def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None: + """ + Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index. + """ + + self.search_spaces = search_spaces + + def generate(self, rng=None): + return SequentialPipelineIndividual(self.search_spaces) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/tree.py b/tpot2/search_spaces/pipelines/tree.py new file mode 100644 index 00000000..de4c2aef --- /dev/null +++ b/tpot2/search_spaces/pipelines/tree.py @@ -0,0 +1,50 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +import networkx as nx +import copy +import matplotlib.pyplot as plt + +from .graph import GraphPipelineIndividual, GraphPipeline + + +from .graph_utils import * + +class TreePipelineIndividual(GraphPipelineIndividual): + def __init__(self, + **kwargs) -> None: + super().__init__(**kwargs) + + self.crossover_methods_list = [self._crossover_swap_branch, self._crossover_swap_node, self._crossover_nodes] + self.mutate_methods_list = [self._mutate_insert_leaf, self._mutate_insert_inner_node, self._mutate_remove_node, self._mutate_node] + + + +class TreePipeline(SklearnIndividualGenerator): + def __init__(self, root_search_space : SklearnIndividualGenerator, + leaf_search_space : SklearnIndividualGenerator = None, + inner_search_space : SklearnIndividualGenerator =None, + min_size: int = 2, + max_size: int = 10, + crossover_same_depth=False, + rng=None) -> None: + + """ + Generates a pipeline of variable length. Pipeline will have a tree structure similar to TPOT1. + + """ + + self.search_space = root_search_space + self.leaf_search_space = leaf_search_space + self.inner_search_space = inner_search_space + self.min_size = min_size + self.max_size = max_size + self.crossover_same_depth = crossover_same_depth + + def generate(self, rng=None): + return TreePipelineIndividual(self.search_space, self.leaf_search_space, self.inner_search_space, self.min_size, self.max_size, self.crossover_same_depth, rng=rng) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py new file mode 100644 index 00000000..3521d8dd --- /dev/null +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -0,0 +1,84 @@ + +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace + + +class WrapperPipelineIndividual(SklearnIndividual): + def __init__(self, + nodegen: SklearnIndividualGenerator, + method: type, + space: ConfigurationSpace, + rng=None) -> None: + + + + super().__init__() + + self.nodegen = nodegen + self.node = np.random.default_rng(rng).choice(self.nodegen).generate() + + + self.method = method + self.space = space + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = self.space.sample_configuration().get_dictionary() + + + + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + if rng.choice([True, False]): + return self._mutate_hyperparameters(rng) + else: + return self._mutate_node(rng) + + def _mutate_hyperparameters(self, rng=None): + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = self.space.sample_configuration().get_dictionary() + return True + + def _mutate_node(self, rng=None): + return self.node.mutate(rng) + + def crossover(self, other, rng=None): + return self.node.crossover(other.node, rng) + + def export_pipeline(self): + + est = self.node.export_pipeline() + wrapped_est = self.method(est, **self.hyperparameters) + return wrapped_est + + + def unique_id(self): + return self.node.unique_id() + + +class WrapperPipeline(SklearnIndividualGenerator): + def __init__(self, nodegen: SklearnIndividualGenerator, + method: type, + space: ConfigurationSpace, + ) -> None: + + """ + This search space is for wrapping a sklearn estimator with a method that takes another estimator and hyperparameters as arguments. + For example, this can be used with sklearn.ensemble.BaggingClassifier or sklearn.ensemble.AdaBoostClassifier. + + """ + + + self.nodegen = nodegen + self.method = method + self.space = space + + def generate(self, rng=None): + return WrapperPipelineIndividual(self.nodegen, self.method, self.space, rng) \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/test/__init__.py b/tpot2/search_spaces/templates/__init__.py similarity index 100% rename from tpot2/individual_representations/graph_pipeline_individual/test/__init__.py rename to tpot2/search_spaces/templates/__init__.py diff --git a/tpot2/search_spaces/templates/autoqtl.py b/tpot2/search_spaces/templates/autoqtl.py new file mode 100644 index 00000000..e69de29b diff --git a/tpot2/search_spaces/templates/stc.py b/tpot2/search_spaces/templates/stc.py new file mode 100644 index 00000000..e69de29b diff --git a/tpot2/selectors/lexicase_selection.py b/tpot2/selectors/lexicase_selection.py index 0afe1f34..cf3be98f 100644 --- a/tpot2/selectors/lexicase_selection.py +++ b/tpot2/selectors/lexicase_selection.py @@ -1,6 +1,6 @@ import numpy as np -def lexicase_selection(scores, k, rng_=None, n_parents=1,): +def lexicase_selection(scores, k, rng=None, n_parents=1,): """Select the best individual according to Lexicase Selection, *k* times. The returned list contains the indices of the chosen *individuals*. :param scores: The score matrix, where rows the individulas and the columns are the corresponds to scores on different objectives. @@ -8,7 +8,7 @@ def lexicase_selection(scores, k, rng_=None, n_parents=1,): This function uses the :func:`~random.choice` function from the python base :mod:`random` module. """ - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) chosen =[] for i in range(k*n_parents): candidates = list(range(len(scores))) diff --git a/tpot2/selectors/max_weighted_average_selector.py b/tpot2/selectors/max_weighted_average_selector.py index d142bafd..61848723 100644 --- a/tpot2/selectors/max_weighted_average_selector.py +++ b/tpot2/selectors/max_weighted_average_selector.py @@ -1,6 +1,6 @@ import numpy as np -def max_weighted_average_selector(scores,k, rng_=None, n_parents=1,): +def max_weighted_average_selector(scores,k, rng=None, n_parents=1,): ave_scores = [np.nanmean(s ) for s in scores ] #TODO make this more efficient chosen = np.argsort(ave_scores)[::-1][0:k] #TODO check this behavior with nans return np.reshape(chosen, (k, n_parents)) \ No newline at end of file diff --git a/tpot2/selectors/nsgaii.py b/tpot2/selectors/nsgaii.py index bb7bf76d..d708267f 100644 --- a/tpot2/selectors/nsgaii.py +++ b/tpot2/selectors/nsgaii.py @@ -87,7 +87,7 @@ def crowding_distance(matrix): -def survival_select_NSGA2(scores, k, rng_=None): +def survival_select_NSGA2(scores, k, rng=None): pareto_fronts = nondominated_sorting(scores) diff --git a/tpot2/selectors/random_selector.py b/tpot2/selectors/random_selector.py index 54b37978..7812396d 100644 --- a/tpot2/selectors/random_selector.py +++ b/tpot2/selectors/random_selector.py @@ -1,6 +1,6 @@ import numpy as np -def random_selector(scores, k, rng_=None, n_parents=1, ): - rng = np.random.default_rng(rng_) +def random_selector(scores, k, rng=None, n_parents=1, ): + rng = np.random.default_rng(rng) chosen = rng.choice(list(range(0,len(scores))), size=k*n_parents) return np.reshape(chosen, (k, n_parents)) \ No newline at end of file diff --git a/tpot2/selectors/tournament_selection.py b/tpot2/selectors/tournament_selection.py index a715a9dd..74a31742 100644 --- a/tpot2/selectors/tournament_selection.py +++ b/tpot2/selectors/tournament_selection.py @@ -1,6 +1,6 @@ import numpy as np -def tournament_selection(scores, k, rng_=None, n_parents=1, tournament_size=2, score_index=0): +def tournament_selection(scores, k, rng=None, n_parents=1, tournament_size=2, score_index=0): """Select the best individual among *tournsize* randomly chosen individuals, *k* times. The returned list contains the indices of the chosen *individuals*. :param scores: The score matrix, where rows the individulas and the columns are the corresponds to scores on different objectives. @@ -12,7 +12,7 @@ def tournament_selection(scores, k, rng_=None, n_parents=1, tournament_size=2, s :mod:`random` module. """ - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) if isinstance(score_index,int): key=lambda x:x[1][score_index] diff --git a/tpot2/selectors/tournament_selection_dominated.py b/tpot2/selectors/tournament_selection_dominated.py index 74556894..90ec371e 100644 --- a/tpot2/selectors/tournament_selection_dominated.py +++ b/tpot2/selectors/tournament_selection_dominated.py @@ -3,7 +3,7 @@ from.nsgaii import nondominated_sorting, crowding_distance, dominates #based on deap -def tournament_selection_dominated(scores, k, rng_=None, n_parents=2): +def tournament_selection_dominated(scores, k, rng=None, n_parents=2): """Select the best individual among *tournsize* randomly chosen individuals, *k* times. The returned list contains the indices of the chosen *individuals*. :param scores: The score matrix, where rows the individulas and the columns are the corresponds to scores on different objectives. @@ -15,7 +15,7 @@ def tournament_selection_dominated(scores, k, rng_=None, n_parents=2): :mod:`random` module. """ - rng = np.random.default_rng(rng_) + rng = np.random.default_rng(rng) pareto_fronts = nondominated_sorting(scores) # chosen = list(itertools.chain.from_iterable(fronts)) diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index 060539c7..50dfa6e0 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -38,14 +38,9 @@ def __init__(self, scorers, objective_function_names = None, bigger_is_better = True, - hyperparameter_probability = 1, - hyper_node_probability = 0, - hyperparameter_alpha = 1, - max_size = np.inf, - linear_pipeline = False, - root_config_dict= 'Auto', - inner_config_dict=["selectors", "transformers"], - leaf_config_dict= None, + search_space = None, + + cross_val_predict_cv = 0, categorical_features = None, subsets = None, @@ -101,7 +96,6 @@ def __init__(self, scorers, #debugging and logging parameters warm_start = False, - subset_column = None, periodic_checkpoint_folder = None, callback = None, @@ -148,69 +142,6 @@ def __init__(self, scorers, If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction. - max_size : int, default=np.inf - The maximum number of nodes of the pipelines to be generated. - - linear_pipeline : bool, default=False - If True, the pipelines generated will be linear. If False, the pipelines generated will be directed acyclic graphs. - - root_config_dict : dict, default='auto' - The configuration dictionary to use for the root node of the model. - If 'auto', will use "classifiers" if classification=True, else "regressors". - - 'selectors' : A selection of sklearn Selector methods. - - 'classifiers' : A selection of sklearn Classifier methods. - - 'regressors' : A selection of sklearn Regressor methods. - - 'transformers' : A selection of sklearn Transformer methods. - - 'arithmetic_transformer' : A selection of sklearn Arithmetic Transformer methods that replicate symbolic classification/regression operators. - - 'passthrough' : A node that just passes though the input. Useful for passing through raw inputs into inner nodes. - - 'feature_set_selector' : A selector that pulls out specific subsets of columns from the data. Only well defined as a leaf. - Subsets are set with the subsets parameter. - - 'skrebate' : Includes ReliefF, SURF, SURFstar, MultiSURF. - - 'MDR' : Includes MDR. - - 'ContinuousMDR' : Includes ContinuousMDR. - - 'genetic encoders' : Includes Genetic Encoder methods as used in AutoQTL. - - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL. - - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary. - - inner_config_dict : dict, default=["selectors", "transformers"] - The configuration dictionary to use for the inner nodes of the model generation. - Default ["selectors", "transformers"] - - 'selectors' : A selection of sklearn Selector methods. - - 'classifiers' : A selection of sklearn Classifier methods. - - 'regressors' : A selection of sklearn Regressor methods. - - 'transformers' : A selection of sklearn Transformer methods. - - 'arithmetic_transformer' : A selection of sklearn Arithmetic Transformer methods that replicate symbolic classification/regression operators. - - 'passthrough' : A node that just passes though the input. Useful for passing through raw inputs into inner nodes. - - 'feature_set_selector' : A selector that pulls out specific subsets of columns from the data. Only well defined as a leaf. - Subsets are set with the subsets parameter. - - 'skrebate' : Includes ReliefF, SURF, SURFstar, MultiSURF. - - 'MDR' : Includes MDR. - - 'ContinuousMDR' : Includes ContinuousMDR. - - 'genetic encoders' : Includes Genetic Encoder methods as used in AutoQTL. - - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL. - - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary. - - None : If None and max_depth>1, the root_config_dict will be used for the inner nodes as well. - - leaf_config_dict : dict, default=None - The configuration dictionary to use for the leaf node of the model. If set, leaf nodes must be from this dictionary. - Otherwise leaf nodes will be generated from the root_config_dict. - Default None - - 'selectors' : A selection of sklearn Selector methods. - - 'classifiers' : A selection of sklearn Classifier methods. - - 'regressors' : A selection of sklearn Regressor methods. - - 'transformers' : A selection of sklearn Transformer methods. - - 'arithmetic_transformer' : A selection of sklearn Arithmetic Transformer methods that replicate symbolic classification/regression operators. - - 'passthrough' : A node that just passes though the input. Useful for passing through raw inputs into inner nodes. - - 'feature_set_selector' : A selector that pulls out specific subsets of columns from the data. Only well defined as a leaf. - Subsets are set with the subsets parameter. - - 'skrebate' : Includes ReliefF, SURF, SURFstar, MultiSURF. - - 'MDR' : Includes MDR. - - 'ContinuousMDR' : Includes ContinuousMDR. - - 'genetic encoders' : Includes Genetic Encoder methods as used in AutoQTL. - - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL. - - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary. - - None : If None, a leaf will not be required (i.e. the pipeline can be a single root node). Leaf nodes will be generated from the inner_config_dict. - cross_val_predict_cv : int, default=0 Number of folds to use for the cross_val_predict function for inner classifiers and regressors. Estimators will still be fit on the full dataset, but the following node will get the outputs from cross_val_predict. @@ -378,9 +309,6 @@ def __init__(self, scorers, warm_start : bool, default=False If True, will use the continue the evolutionary algorithm from the last generation of the previous run. - subset_column : str or int, default=None - EXPERIMENTAL The column to use for the subset selection. Must also pass in unique_subset_values to GraphIndividual to function. - periodic_checkpoint_folder : str, default=None Folder to save the population to periodically. If None, no periodic saving will be done. If provided, training will resume from this checkpoint. @@ -441,14 +369,9 @@ def __init__(self, scorers, self.other_objective_functions_weights = other_objective_functions_weights self.objective_function_names = objective_function_names self.bigger_is_better = bigger_is_better - self.hyperparameter_probability = hyperparameter_probability - self.hyper_node_probability = hyper_node_probability - self.hyperparameter_alpha = hyperparameter_alpha - self.max_size = max_size - self.linear_pipeline = linear_pipeline - self.root_config_dict= root_config_dict - self.inner_config_dict= inner_config_dict - self.leaf_config_dict= leaf_config_dict + + self.search_space = search_space + self.cross_val_predict_cv = cross_val_predict_cv self.categorical_features = categorical_features self.subsets = subsets @@ -487,7 +410,6 @@ def __init__(self, scorers, self.selection_evaluation_early_stop = selection_evaluation_early_stop self.selection_evaluation_scaling = selection_evaluation_scaling self.warm_start = warm_start - self.subset_column = subset_column self.verbose = verbose self.periodic_checkpoint_folder = periodic_checkpoint_folder self.callback = callback @@ -501,7 +423,7 @@ def __init__(self, scorers, self.optuna_optimize_pareto_front_timeout = optuna_optimize_pareto_front_timeout self.optuna_storage = optuna_storage - # create random number generator based on rng_seed + # create random number generator based on rngseed self.rng = np.random.default_rng(random_state) # save random state passed to us for other functions that use random_state self.random_state = random_state @@ -675,17 +597,6 @@ def fit(self, X, y): else: self.feature_names = None - if self.root_config_dict == 'Auto': - if self.classification: - n_classes = len(np.unique(y)) - root_config_dict = get_configuration_dictionary("classifiers", n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names, n_classes=n_classes) - else: - root_config_dict = get_configuration_dictionary("regressors", n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) - else: - root_config_dict = get_configuration_dictionary(self.root_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets,feature_names=self.feature_names) - - inner_config_dict = get_configuration_dictionary(self.inner_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) - leaf_config_dict = get_configuration_dictionary(self.leaf_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) def objective_function(pipeline_individual, @@ -697,7 +608,6 @@ def objective_function(pipeline_individual, other_objective_functions=self.other_objective_functions, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, **kwargs): return objective_function_generator( pipeline_individual, @@ -709,21 +619,10 @@ def objective_function(pipeline_individual, other_objective_functions=other_objective_functions, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, ) - self.individual_generator_instance = tpot2.individual_representations.graph_pipeline_individual.estimator_graph_individual_generator( - inner_config_dict=inner_config_dict, - root_config_dict=root_config_dict, - leaf_config_dict=leaf_config_dict, - max_size = self.max_size, - linear_pipeline=self.linear_pipeline, - hyperparameter_probability=self.hyperparameter_probability, - hyper_node_probability=self.hyper_node_probability, - hyperparameter_alpha=self.hyperparameter_alpha, - rng_=self.rng, - ) + if self.threshold_evaluation_early_stop is not None or self.selection_evaluation_early_stop is not None: evaluation_early_stop_steps = self.cv @@ -737,9 +636,14 @@ def objective_function(pipeline_individual, X_future = X y_future = y + def ind_generator(rng): + rng = np.random.default_rng(rng) + while True: + yield self.search_space.generate(rng) + #If warm start and we have an evolver instance, use the existing one if not(self.warm_start and self._evolver_instance is not None): - self._evolver_instance = self._evolver( individual_generator=self.individual_generator_instance, + self._evolver_instance = self._evolver( individual_generator=ind_generator(self.rng), objective_functions= [objective_function], objective_function_weights = self.objective_function_weights, objective_names=self.objective_names, @@ -781,7 +685,7 @@ def objective_function(pipeline_individual, mutate_then_crossover_probability= self.mutate_then_crossover_probability, crossover_then_mutate_probability= self.crossover_then_mutate_probability, - rng_=self.rng, + rng=self.rng, ) @@ -829,7 +733,7 @@ def objective_function(pipeline_individual, other_objective_functions=self.other_objective_functions, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, + **kwargs: objective_function_generator( ind, X, @@ -840,7 +744,6 @@ def objective_function(pipeline_individual, other_objective_functions=other_objective_functions, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, )] @@ -882,7 +785,6 @@ def objective_function(pipeline_individual, other_objective_functions=self.other_objective_functions, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, **kwargs: val_objective_function_generator( ind, X, @@ -893,7 +795,6 @@ def objective_function(pipeline_individual, other_objective_functions=other_objective_functions, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, )] @@ -920,7 +821,7 @@ def objective_function(pipeline_individual, self.selected_best_score = self.evaluated_individuals.loc[best_idx] - best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, subset_column=self.subset_column) + best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) if self.preprocessing: self.fitted_pipeline_ = sklearn.pipeline.make_pipeline(sklearn.base.clone(self._preprocessing_pipeline), best_individual_pipeline ) diff --git a/tpot2/tpot_estimator/steady_state_estimator.py b/tpot2/tpot_estimator/steady_state_estimator.py index 240b3a86..777c8cad 100644 --- a/tpot2/tpot_estimator/steady_state_estimator.py +++ b/tpot2/tpot_estimator/steady_state_estimator.py @@ -483,7 +483,7 @@ def __init__(self, scorers= [], self.optuna_optimize_pareto_front_timeout = optuna_optimize_pareto_front_timeout self.optuna_storage = optuna_storage - # create random number generator based on rng_seed + # create random number generator based on rngseed self.rng = np.random.default_rng(random_state) # save random state passed to us for other functions that use random_state self.random_state = random_state @@ -759,7 +759,7 @@ def objective_function(pipeline_individual, max_evaluated_individuals = self.max_evaluated_individuals, - rng_=self.rng, + rng=self.rng, ) From da1749e2bacc86d3eb1f688b173e01f8d8ad3a51 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 19 Mar 2024 19:49:34 -0700 Subject: [PATCH 02/75] edits --- tpot2/config/autoqtl_builtins.py | 32 +- tpot2/config/classifiers.py | 138 +++-- tpot2/config/classifiers_sklearnex.py | 128 ++-- tpot2/config/mdr_configs.py | 13 +- tpot2/config/regressors.py | 565 ++++++++---------- tpot2/config/regressors_sklearnex.py | 148 +++-- tpot2/config/special_configs.py | 89 +-- tpot2/config/transformers.py | 8 +- tpot2/search_spaces/nodes/estimator_node.py | 17 +- .../nodes/fss_node.py} | 0 10 files changed, 590 insertions(+), 548 deletions(-) rename tpot2/{config/all_single_modules.py => search_spaces/nodes/fss_node.py} (100%) diff --git a/tpot2/config/autoqtl_builtins.py b/tpot2/config/autoqtl_builtins.py index d3cc8dfc..b317fe70 100644 --- a/tpot2/config/autoqtl_builtins.py +++ b/tpot2/config/autoqtl_builtins.py @@ -3,22 +3,24 @@ import sklearn import numpy as np -def params_FeatureEncodingFrequencySelector(trial, name=None): - return { - 'threshold': trial.suggest_float(f'threshold_{name}', 0, .35) - } +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +def get_FeatureEncodingFrequencySelector_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'threshold': Float("threshold", bounds=(0, .35)) + } + ) +def get_encoder_ConfigurationSpace(): + return ConfigurationSpace( + space = {} + ) +# genetic_encoders.DominantEncoder : {}, +# genetic_encoders.RecessiveEncoder : {}, +# genetic_encoders.HeterosisEncoder : {}, +# genetic_encoders.UnderDominanceEncoder : {}, +# genetic_encoders.OverDominanceEncoder : {}, -def make_FeatureEncodingFrequencySelector_config_dictionary(): - return {feature_encoding_frequency_selector.FeatureEncodingFrequencySelector: params_FeatureEncodingFrequencySelector} - -def make_genetic_encoders_config_dictionary(): - return { - genetic_encoders.DominantEncoder : {}, - genetic_encoders.RecessiveEncoder : {}, - genetic_encoders.HeterosisEncoder : {}, - genetic_encoders.UnderDominanceEncoder : {}, - genetic_encoders.OverDominanceEncoder : {}, - } diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 5816b6bb..9649e463 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -2,17 +2,26 @@ from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -def get_LogisticRegression_ConfigurationSpace(): +#TODO Conditional search space to prevent invalid combinations of hyperparameters +def get_LogisticRegression_ConfigurationSpace(random_state=None): + + space = { + 'solver': Categorical('solver', ['saga','liblinear']), + 'penalty': Categorical("penalty", ['elasticnet','l1', 'l2']), #TODO workaround to support None option? + 'dual': Categorical("dual", [True, False]), + 'C': Float("C", bounds=(1e-4, 1e4), log=True), + + #TODO workaround for including None as a value for class_weight + 'class_weight': Categorical("class_weight", ['balanced']), + 'n_jobs': 1, + 'max_iter': 1000, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( - space = { - 'solver': Categorical("solver", ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']), - 'penalty': Categorical("penalty", ['l1', 'l2']), - 'dual': Categorical("dual", [True, False]), - 'C': Float("C", bounds=(1e-4, 1e4), log=True), - 'class_weight': Categorical("class_weight", ['balanced']), - 'n_jobs': 1, - 'max_iter': 1000, - } + space = space ) @@ -21,7 +30,7 @@ def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): space = { - 'n_neighbors': Integer("n_neighbors", bounds=(1, max(50,n_samples))), + 'n_neighbors': Integer("n_neighbors", bounds=(1, min(100,n_samples)), log=True), 'weights': Categorical("weights", ['uniform', 'distance']), 'p': Integer("p", bounds=(1, 3)), 'metric': Categorical("metric", ['euclidean', 'minkowski']), @@ -30,33 +39,48 @@ def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): ) -def get_DecisionTreeClassifier_ConfigurationSpace(): +def get_DecisionTreeClassifier_ConfigurationSpace(random_state=None): + + space = { + 'criterion': Categorical("criterion", ['gini', 'entropy']), + 'max_depth': Integer("max_depth", bounds=(1, 20)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'max_features': Categorical("max_features", ['sqrt', 'log2']), + 'min_weight_fraction_leaf': 0.0, + } + + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( - space = { - 'criterion': Categorical("criterion", ['gini', 'entropy']), - 'max_depth': Integer("max_depth", bounds=(1, 11)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), - 'max_features': Categorical("max_features", ['sqrt', 'log2']), - 'min_weight_fraction_leaf': 0.0, - } + space = space ) -def get_SVC_ConfigurationSpace(): - return ConfigurationSpace( - space = { - 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), - 'C': Float("C", bounds=(1e-4, 25), log=True), - 'degree': Integer("degree", bounds=(1, 4)), +def get_SVC_ConfigurationSpace(random_state=None): - #'class_weight': Categorical("class_weight", [None, 'balanced']), #TODO add class_weight. configspace doesn't allow None as a value. - 'max_iter': 3000, - 'tol': Float("tol", bounds=(0.001, 0.01)), - 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical - } + space = { + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), + + #'class_weight': Categorical("class_weight", [None, 'balanced']), #TODO add class_weight. configspace doesn't allow None as a value. + 'max_iter': 3000, + 'tol': 0.001, + 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical + } + + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space ) +#TODO Conditional search spaces def get_LinearSVC_ConfigurationSpace(random_state=None,): space = { 'penalty': Categorical("penalty", ['l1', 'l2']), @@ -77,6 +101,7 @@ def get_LinearSVC_ConfigurationSpace(random_state=None,): def get_RandomForestClassifier_ConfigurationSpace(random_state=None): space = { + 'n_estimators': 100, 'criterion': Categorical("criterion", ['gini', 'entropy']), 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), @@ -90,34 +115,42 @@ def get_RandomForestClassifier_ConfigurationSpace(random_state=None): space = space ) -def get_GradientBoostingClassifier_ConfigurationSpace(n_classes=None): +def get_GradientBoostingClassifier_ConfigurationSpace(random_state=None, n_classes=None): if n_classes is not None and n_classes > 2: loss = 'log_loss' else: loss = Categorical("loss", ['log_loss', 'exponential']) + + space = { + 'n_estimators': 100, + 'loss': loss, + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'max_features': Float("max_features", bounds=(0.1, 1.0)), + 'max_depth': Integer("max_depth", bounds=(1, 10)), + 'tol': 1e-4, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state return ConfigurationSpace( - space = { - 'n_estimators': 100, - 'loss': loss, - 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), - 'subsample': Float("subsample", bounds=(0.1, 1.0)), - 'max_features': Float("max_features", bounds=(0.1, 1.0)), - 'max_depth': Integer("max_depth", bounds=(1, 10)), - } + space = space ) def get_XGBClassifier_ConfigurationSpace(random_state=None,): space = { + 'n_estimators': 100, 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), 'subsample': Float("subsample", bounds=(0.1, 1.0)), 'min_child_weight': Integer("min_child_weight", bounds=(1, 21)), 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'n_jobs': 1, } if random_state is not None: #This is required because configspace doesn't allow None as a value @@ -149,7 +182,7 @@ def get_LGBMClassifier_ConfigurationSpace(random_state=None,): def get_ExtraTreesClassifier_ConfigurationSpace(random_state=None): space = { - 'n_estimators': Integer("n_estimators", bounds=(10, 500)), + 'n_estimators': 100, 'criterion': Categorical("criterion", ["gini", "entropy"]), 'max_features': Float("max_features", bounds=(0.05, 1.00)), 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), @@ -207,7 +240,7 @@ def get_MLPClassifier_ConfigurationSpace(random_state=None): def get_BernoulliNB_ConfigurationSpace(): return ConfigurationSpace( space = { - 'alpha': Float("alpha", bounds=(1e-3, 100), log=True), + 'alpha': Float("alpha", bounds=(1e-2, 100), log=True), 'fit_prior': Categorical("fit_prior", [True, False]), } ) @@ -220,3 +253,20 @@ def get_MultinomialNB_ConfigurationSpace(): 'fit_prior': Categorical("fit_prior", [True, False]), } ) + + + +def get_AdaBoostClassifier_ConfigurationSpace(random_state=None): + space = { + 'n_estimators': Integer("n_estimators", bounds=(50, 500)), + 'learning_rate': Float("learning_rate", bounds=(0.01, 2), log=True), + 'algorithm': Categorical("algorithm", ['SAMME', 'SAMME.R']), + 'max_depth': Integer("max_depth", bounds=(1, 10)), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) \ No newline at end of file diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py index 16983332..939df92f 100644 --- a/tpot2/config/classifiers_sklearnex.py +++ b/tpot2/config/classifiers_sklearnex.py @@ -5,76 +5,86 @@ from sklearnex.linear_model import LogisticRegression import numpy as np - +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal from functools import partial -def params_RandomForestClassifier(trial, random_state=None, name=None): - return { - 'n_estimators': 100, - 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 20), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 20), - 'n_jobs': 1, - 'random_state': random_state - } +def get_RandomForestClassifier_ConfigurationSpace(random_state=None): + space = { + 'n_estimators': 100, #TODO make this a higher number? learned? + 'bootstrap': Categorical("bootstrap", [True, False]), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'n_jobs': 1, + + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) -def params_KNeighborsClassifier(trial, name=None, n_samples=10): - n_neighbors_max = max(n_samples, 100) - return { - 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max, log=True ), - 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), - } +def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): + return ConfigurationSpace( + space = { + 'n_neighbors': Integer("n_neighbors", bounds=(1, max(n_samples, 100)), log=True), + 'weights': Categorical("weights", ['uniform', 'distance']), + } + ) -def params_LogisticRegression(trial, random_state=None, name=None): - params = {} - params['dual'] = False - params['penalty'] = 'l2' - params['solver'] = trial.suggest_categorical(name=f'solver_{name}', choices=['liblinear', 'sag', 'saga']), - if params['solver'] == 'liblinear': - params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2']) - if params['penalty'] == 'l2': - params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False]) - else: - params['penalty'] = 'l1' - return { - 'solver': params['solver'], - 'penalty': params['penalty'], - 'dual': params['dual'], - 'C': trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True), + +#TODO add conditionals +def get_LogisticRegression_ConfigurationSpace(random_state=None): + space = { + 'solver': Categorical("solver", ['liblinear', 'sag', 'saga']), + 'penalty': Categorical("penalty", ['l1', 'l2']), + 'dual': Categorical("dual", [True, False]), + 'C': Float("C", bounds=(1e-4, 1e4), log=True), 'max_iter': 1000, - 'random_state': random_state } -def params_SVC(trial, random_state=None, name=None): - return { - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), - 'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']), + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_SVC_ConfigurationSpace(random_state=None): + space = { + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), 'max_iter': 3000, - 'tol': 0.005, - 'probability': True, - 'random_state': random_state + 'tol': 0.001, + 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical } -def params_NuSVC(trial, random_state=None, name=None): - return { - 'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0), - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), - 'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']), + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_NuSVC_ConfigurationSpace(random_state=None): + space = { + 'nu': Float("nu", bounds=(0.05, 1.0)), + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), + #TODO work around for None value? + #'class_weight': Categorical("class_weight", [None, 'balanced']), 'max_iter': 3000, 'tol': 0.005, - 'probability': True, - 'random_state': random_state + 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical } -def make_sklearnex_classifier_config_dictionary(random_state=None, n_samples=10, n_classes=None): - return { - RandomForestClassifier: partial(params_RandomForestClassifier, random_state=random_state), - KNeighborsClassifier: partial(params_KNeighborsClassifier, n_samples=n_samples), - LogisticRegression: partial(params_LogisticRegression, random_state=random_state), - SVC: partial(params_SVC, random_state=random_state), - NuSVC: partial(params_NuSVC, random_state=random_state), - } \ No newline at end of file + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) \ No newline at end of file diff --git a/tpot2/config/mdr_configs.py b/tpot2/config/mdr_configs.py index 4f872bd6..bbd7d487 100644 --- a/tpot2/config/mdr_configs.py +++ b/tpot2/config/mdr_configs.py @@ -22,12 +22,13 @@ ) -skrebate_ReliefF_configspace = ConfigurationSpace( - space = { - 'n_features_to_select': Integer('n_features_to_select', bounds=(1, 10), log=True), - 'n_neighbors': Integer('n_neighbors', bounds=(1,500), log=True), - } -) +def get_skrebate_SURF_config_space(n_features=10): + return ConfigurationSpace( + space = { + 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), + 'n_neighbors': Integer('n_neighbors', bounds=(2,500), log=True), + } + ) def make_skrebate_SURF_config_space(n_features=10): diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index ad7aa182..845f9ff1 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -22,375 +22,326 @@ from functools import partial +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal #TODO: fill in remaining #TODO check for places were we could use log scaling -def params_RandomForestRegressor(trial, random_state=None, name=None): - return { +def get_RandomForestRegressor_ConfigurationSpace(random_state=None): + space = { 'n_estimators': 100, - 'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.0), - 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - 'random_state': random_state - } + 'max_features': Float("max_features", bounds=(0.05, 1.0)), + 'bootstrap': Categorical("bootstrap", [True, False]), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + + +def get_SGDRegressor_ConfigurationSpace(random_state=None): + space = { + 'loss': Categorical("loss", ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']), + 'penalty': 'elasticnet', + 'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True), + 'learning_rate': Categorical("learning_rate", ['invscaling', 'constant']), + 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), + 'eta0': Float("eta0", bounds=(0.01, 1.0)), + 'power_t': Float("power_t", bounds=(1e-5, 100.0), log=True), + 'fit_intercept': Categorical("fit_intercept", [True]), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( + space = space + ) -# SGDRegressor parameters -def params_SGDRegressor(trial, random_state=None, name=None): - params = { - 'loss': trial.suggest_categorical(f'loss_{name}', ['huber', 'squared_error', 'epsilon_insensitive', 'squared_epsilon_insensitive']), - 'penalty': 'elasticnet', - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-5, 0.01, log=True), - 'learning_rate': trial.suggest_categorical(f'learning_rate_{name}', ['invscaling', 'constant']), - 'fit_intercept':True, - 'l1_ratio': trial.suggest_float(f'l1_ratio_{name}', 0.0, 1.0), - 'eta0': trial.suggest_float(f'eta0_{name}', 0.01, 1.0), - 'power_t': trial.suggest_float(f'power_t_{name}', 1e-5, 100.0, log=True), - 'random_state': random_state +def get_Ridge_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'fit_intercept': Categorical("fit_intercept", [True]), + 'tol': Float("tol", bounds=(1e-5, 1e-1), log=True), + 'solver': Categorical("solver", ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']), } - return params - -# Ridge parameters -def params_Ridge(trial, random_state=None, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), - 'fit_intercept': True, - - - #'max_iter': trial.suggest_int(f'max_iter_{name}', 100, 1000), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'solver': trial.suggest_categorical(f'solver_{name}', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']), - 'random_state': random_state + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_Lasso_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'fit_intercept': Categorical("fit_intercept", [True]), + 'tol': 0.0001, } - return params + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_ElasticNet_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) -# Lasso parameters -def params_Lasso(trial, random_state=None, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), - 'fit_intercept': True, - # 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - 'precompute': trial.suggest_categorical(f'precompute_{name}', [True, False, 'auto']), +def get_Lars_ConfigurationSpace(random_state=None): + space = { + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state - #'max_iter': trial.suggest_int(f'max_iter_{name}', 100, 1000), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), + return ConfigurationSpace( + space = space + ) - 'positive': trial.suggest_categorical(f'positive_{name}', [True, False]), - 'selection': trial.suggest_categorical(f'selection_{name}', ['cyclic', 'random']), - 'random_state': random_state - } - return params - -# ElasticNet parameters -def params_ElasticNet(trial, random_state=None, name=None): - params = { - 'alpha': 1 - trial.suggest_float(f'alpha_{name}', 0.0, 1.0, log=True), - 'l1_ratio': 1- trial.suggest_float(f'l1_ratio_{name}',0.0, 1.0), - 'random_state': random_state +def get_OthogonalMatchingPursuit_ConfigurationSpace(): + return ConfigurationSpace( + space = { } - return params - -# Lars parameters -def params_Lars(trial, random_state=None, name=None): - params = { - 'fit_intercept': True, - 'verbose': trial.suggest_categorical(f'verbose_{name}', [True, False]), - 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - - # 'precompute': trial.suggest_categorical(f'precompute_{name}', ['auto_{name}', True, False]), - 'n_nonzero_coefs': trial.suggest_int(f'n_nonzero_coefs_{name}', 1, 100), - 'eps': trial.suggest_float(f'eps_{name}', 1e-5, 1e-1, log=True), - 'copy_X': trial.suggest_categorical(f'copy_X_{name}', [True, False]), - 'fit_path': trial.suggest_categorical(f'fit_path_{name}', [True, False]), - # 'positive': trial.suggest_categorical(f'positive_{name}', [True, False]), - 'random_state': random_state - } - return params - -# OrthogonalMatchingPursuit parameters -def params_OrthogonalMatchingPursuit(trial, name=None): - params = { - 'n_nonzero_coefs': trial.suggest_int(f'n_nonzero_coefs_{name}', 1, 100), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'fit_intercept': True, - 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - 'precompute': trial.suggest_categorical(f'precompute_{name}', ['auto', True, False]), - } - return params - -# BayesianRidge parameters -def params_BayesianRidge(trial, name=None): - params = { - 'n_iter': trial.suggest_int(f'n_iter_{name}', 100, 1000), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'alpha_1': trial.suggest_float(f'alpha_1_{name}', 1e-6, 1e-1, log=True), - 'alpha_2': trial.suggest_float(f'alpha_2_{name}', 1e-6, 1e-1, log=True), - 'lambda_1': trial.suggest_float(f'lambda_1_{name}', 1e-6, 1e-1, log=True), - 'lambda_2': trial.suggest_float(f'lambda_2_{name}', 1e-6, 1e-1, log=True), - 'compute_score': trial.suggest_categorical(f'compute_score_{name}', [True, False]), - 'fit_intercept': True, - 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - 'copy_X': trial.suggest_categorical(f'copy_X_{name}', [True, False]), - } - return params - -# LassoLars parameters -def params_LassoLars(trial, random_state=None, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), - # 'fit_intercept': True, - # 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - # 'precompute': trial.suggest_categorical(f'precompute_{name}', ['auto_{name}', True, False]), - #'max_iter': trial.suggest_int(f'max_iter_{name}', 100, 1000), - 'eps': trial.suggest_float(f'eps_{name}', 1e-5, 1e-1, log=True), - # 'copy_X': trial.suggest_categorical(f'copy_X_{name}', [True, False]), - # 'positive': trial.suggest_categorical(f'positive_{name}', [True, False]), - 'random_state': random_state - } - return params + ) + +def get_BayesianRidge_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'tol': 0.0001, + 'alpha_1': Float("alpha_1", bounds=(1e-6, 1e-1), log=True), + 'alpha_2': Float("alpha_2", bounds=(1e-6, 1e-1), log=True), + 'lambda_1': Float("lambda_1", bounds=(1e-6, 1e-1), log=True), + 'lambda_2': Float("lambda_2", bounds=(1e-6, 1e-1), log=True), + } + ) -# LassoLars parameters -def params_LassoLarsCV(trial, cv, name=None): - params = { - 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - 'cv': cv, - } - return params - -# BaggingRegressor parameters -def params_BaggingRegressor(trial, random_state=None, name=None): - params = { - 'n_estimators': trial.suggest_int(f'n_estimators_{name}', 10, 100), - 'max_samples': trial.suggest_float(f'max_samples_{name}', 0.05, 1.00), - 'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.00), - 'bootstrap': trial.suggest_categorical(f'bootstrap_{name}', [True, False]), - 'bootstrap_features': trial.suggest_categorical(f'bootstrap_features_{name}', [True, False]), - 'random_state': random_state - } - return params - -# ARDRegression parameters -def params_ARDRegression(trial, name=None): - params = { - 'n_iter': trial.suggest_int(f'n_iter_{name}', 100, 1000), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'alpha_1': trial.suggest_float(f'alpha_1_{name}', 1e-6, 1e-1, log=True), - 'alpha_2': trial.suggest_float(f'alpha_2_{name}', 1e-6, 1e-1, log=True), - 'lambda_1': trial.suggest_float(f'lambda_1_{name}', 1e-6, 1e-1, log=True), - 'lambda_2': trial.suggest_float(f'lambda_2_{name}', 1e-6, 1e-1, log=True), - 'compute_score': trial.suggest_categorical(f'compute_score_{name}', [True, False]), - 'threshold_lambda': trial.suggest_int(f'threshold_lambda_{name}', 100, 1000), - 'fit_intercept': True, - 'normalize': trial.suggest_categorical(f'normalize_{name}', [True, False]), - 'copy_X': trial.suggest_categorical(f'copy_X_{name}', [True, False]), + +def get_LassoLars_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'eps': Float("eps", bounds=(1e-5, 1e-1), log=True), } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( + space = space + ) -# TheilSenRegressor parameters -def params_TheilSenRegressor(trial, random_state=None, name=None): - params = { - 'n_subsamples': trial.suggest_int(f'n_subsamples_{name}', 10, 100), - 'max_subpopulation': trial.suggest_int(f'max_subpopulation_{name}', 100, 1000), - 'fit_intercept': True, - 'copy_X': trial.suggest_categorical(f'copy_X_{name}', [True, False]), - 'verbose': trial.suggest_categorical(f'verbose_{name}', [True, False]), - 'random_state': random_state - } - return params +def get_LassoLarsCV_ConfigurationSpace(cv): + return ConfigurationSpace( + space = { + 'cv': cv, + } + ) -# SVR parameters -def params_SVR(trial, name=None): - params = { - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), - 'max_iter': 3000, - 'tol': 0.005, - } - return params - -# Perceptron parameters -def params_Perceptron(trial, random_state=None, name=None): - params = { - 'penalty': trial.suggest_categorical(f'penalty_{name}', [None, 'l2', 'l1', 'elasticnet']), - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-5, 1e-1, log=True), - 'l1_ratio': trial.suggest_float(f'l1_ratio_{name}', 0.0, 1.0), - 'fit_intercept': True, - #'max_iter': trial.suggest_int(f'max_iter_{name}', 100, 1000), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'shuffle': trial.suggest_categorical(f'shuffle_{name}', [True, False]), - 'verbose': trial.suggest_categorical(f'verbose_{name}', [0, 1, 2, 3, 4, 5]), - 'eta0': trial.suggest_float(f'eta0_{name}', 1e-5, 1e-1, log=True), - 'learning_rate': trial.suggest_categorical(f'learning_rate_{name}', ['constant', 'optimal', 'invscaling']), - 'early_stopping': trial.suggest_categorical(f'early_stopping_{name}', [True, False]), - 'validation_fraction': trial.suggest_float(f'validation_fraction_{name}', 0.05, 1.00), - 'n_iter_no_change': trial.suggest_int(f'n_iter_no_change_{name}', 1, 100), - 'class_weight': trial.suggest_categorical(f'class_weight_{name}', [None, 'balanced']), - 'warm_start': trial.suggest_categorical(f'warm_start_{name}', [True, False]), - 'average': trial.suggest_categorical(f'average_{name}', [True, False]), - 'random_state': random_state +def get_BaggingRegressor_ConfigurationSpace(random_state=None): + space = { + 'max_samples': Float("max_samples", bounds=(0.05, 1.00)), + 'max_features': Float("max_features", bounds=(0.05, 1.00)), + 'bootstrap': Categorical("bootstrap", [True, False]), + 'bootstrap_features': Categorical("bootstrap_features", [True, False]), } - return params -def params_MLPRegressor(trial, random_state=None, name=None): - params = { - 'alpha': trial.suggest_float(f'alpha_{name}', 1e-4, 1e-1, log=True), - 'learning_rate_init': trial.suggest_float(f'learning_rate_init_{name}', 1e-3, 1., log=True), - 'random_state': random_state - } + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state - return params + return ConfigurationSpace( + space = space + ) +def get_ARDRegression_ConfigurationSpace(): + return ConfigurationSpace( + space = { -#GradientBoostingRegressor parameters -def params_GradientBoostingRegressor(trial, random_state=None, name=None): - loss = trial.suggest_categorical(f'loss_{name}', ['ls', 'lad', 'huber', 'quantile']) + 'alpha_1': Float("alpha_1", bounds=(1e-6, 1e-1), log=True), + 'alpha_2': Float("alpha_2", bounds=(1e-6, 1e-1), log=True), + 'lambda_1': Float("lambda_1", bounds=(1e-6, 1e-1), log=True), + 'lambda_2': Float("lambda_2", bounds=(1e-6, 1e-1), log=True), + 'threshold_lambda': Integer("threshold_lambda", bounds=(100, 1000)), - params = { - - 'n_estimators': 100, - 'loss': loss, - 'learning_rate': trial.suggest_float(f'learning_rate_{name}', 1e-4, 1, log=True), - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 11), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - 'subsample': 1-trial.suggest_float(f'subsample_{name}', 0.05, 1.00, log=True), - 'max_features': 1-trial.suggest_float(f'max_features_{name}', 0.05, 1.00, log=True), - 'random_state': random_state + } + ) +def get_TheilSenRegressor_ConfigurationSpace(random_state=None): + space = { + 'n_subsamples': Integer("n_subsamples", bounds=(10, 100)), + 'max_subpopulation': Integer("max_subpopulation", bounds=(100, 1000)), } - if loss == 'quantile' or loss == 'huber': - alpha = trial.suggest_float(f'alpha_{name}', 0.05, 0.95) - params['alpha'] = alpha + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state - return params + return ConfigurationSpace( + space = space + ) +def get_SVR_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), + 'max_iter': 3000, + 'tol': 0.005, + } + ) -def params_DecisionTreeRegressor(trial, random_state=None, name=None): - params = { - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1,11), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - # 'criterion': trial.suggest_categorical(f'criterion_{name}', ['squared_error', 'friedman_mse', 'absolute_error', 'poisson']), - # 'splitter': trial.suggest_categorical(f'splitter_{name}', ['best', 'random']), - #'max_features': trial.suggest_categorical(f'max_features_{name}', [None, 'auto', 'sqrt', 'log2']), - #'ccp_alpha': trial.suggest_float(f'ccp_alpha_{name}', 1e-1, 10.0), - 'random_state': random_state +def get_Perceptron_ConfigurationSpace(random_state=None): + space = { + 'penalty': Categorical("penalty", [None, 'l2', 'l1', 'elasticnet']), + 'alpha': Float("alpha", bounds=(1e-5, 1e-1), log=True), + 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), + 'learning_rate': Categorical("learning_rate", ['constant', 'optimal', 'invscaling']), + 'validation_fraction': Float("validation_fraction", bounds=(0.05, 1.00)), } - return params - -def params_KNeighborsRegressor(trial, name=None, n_samples=100): - params = { - 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_samples, log=True ), - 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), - 'p': trial.suggest_int(f'p_{name}', 1, 3), - 'metric': trial.suggest_categorical(f'metric_{name}', ['minkowski', 'euclidean', 'manhattan']), - } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state -def params_LinearSVR(trial, random_state=None, name=None): - params = { - 'epsilon': trial.suggest_float(f'epsilon_{name}', 1e-4, 1.0, log=True), - 'C': trial.suggest_float(f'C_{name}', 1e-4,25.0, log=True), - 'dual': trial.suggest_categorical(f'dual_{name}', [True,False]), - 'loss': trial.suggest_categorical(f'loss_{name}', ['epsilon_insensitive', 'squared_epsilon_insensitive']), - 'random_state': random_state + return ConfigurationSpace( + space = space + ) +def get_MLPRegressor_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(1e-4, 1e-1), log=True), + 'learning_rate_init': Float("learning_rate_init", bounds=(1e-3, 1.), log=True), } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state -# XGBRegressor parameters -def params_XGBRegressor(trial, random_state=None, name=None): - return { - 'learning_rate': trial.suggest_float(f'learning_rate_{name}', 1e-3, 1, log=True), - 'subsample': trial.suggest_float(f'subsample_{name}', 0.05, 1.0), - 'min_child_weight': trial.suggest_int(f'min_child_weight_{name}', 1, 21), - #'booster': trial.suggest_categorical(name='booster_{name}', choices=['gbtree', 'dart']), - 'n_estimators': 100, - 'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 11), - 'nthread': 1, - 'verbosity': 0, - 'objective': 'reg:squarederror', - 'random_state': random_state - } + return ConfigurationSpace( + space = space + ) -def params_AdaBoostRegressor(trial, random_state=None, name=None): - params = { +def get_GradientBoostingRegressor_ConfigurationSpace(random_state=None): + space = { 'n_estimators': 100, - 'learning_rate': trial.suggest_float(f'learning_rate_{name}', 1e-3, 1.0, log=True), - 'loss': trial.suggest_categorical(f'loss_{name}', ['linear', 'square', 'exponential']), - 'random_state': random_state + 'loss': Categorical("loss", ['ls', 'lad', 'huber', 'quantile']), + 'learning_rate': Float("learning_rate", bounds=(1e-4, 1), log=True), + 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'subsample': Float("subsample", bounds=(0.05, 1.00)), + 'max_features': Float("max_features", bounds=(0.05, 1.00)), + } + +def get_DecisionTreeRegressor_ConfigurationSpace(random_state=None): + space = { + 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), } - return params -# ExtraTreesRegressor parameters -def params_ExtraTreesRegressor(trial, random_state=None, name=None): - params = { - 'n_estimators': 100, - 'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.0), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - 'bootstrap': trial.suggest_categorical(f'bootstrap_{name}', [True, False]), + return ConfigurationSpace( + space = space + ) - #'criterion': trial.suggest_categorical(f'criterion_{name}', ['squared_error', 'poisson', 'absolute_error', 'friedman_mse']), - #'max_depth': trial.suggest_int(f'max_depth_{name}', 1, 10), +def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): + return ConfigurationSpace( + space = { + 'n_neighbors': Integer("n_neighbors", bounds=(1, n_samples)), + 'weights': Categorical("weights", ['uniform', 'distance']), + 'p': Integer("p", bounds=(1, 3)), + 'metric': Categorical("metric", ['minkowski', 'euclidean', 'manhattan']), + } + ) + +def get_LinearSVR_ConfigurationSpace(random_state=None): + space = { + 'epsilon': Float("epsilon", bounds=(1e-4, 1.0), log=True), + 'C': Float("C", bounds=(1e-4, 25.0), log=True), + 'dual': Categorical("dual", [True, False]), + 'loss': Categorical("loss", ['epsilon_insensitive', 'squared_epsilon_insensitive']), + } - #'min_weight_fraction_leaf': trial.suggest_float(f'min_weight_fraction_leaf_{name}', 0.0, 0.5), - # 'max_features': trial.suggest_categorical(f'max_features_{name}', [None, 'auto', 'sqrt', 'log2']), - #'max_leaf_nodes': trial.suggest_int(f'max_leaf_nodes_{name}', 2, 100), - #'min_impurity_decrease': trial.suggest_float(f'min_impurity_decrease_{name}', 1e-5, 1e-1, log=True), - # 'min_impurity_split': trial.suggest_float(f'min_impurity_split_{name}', 1e-5, 1e-1, log=True), + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state - #if bootstrap is True - #'oob_score': trial.suggest_categorical(f'oob_score_{name}', [True, False]), + return ConfigurationSpace( + space = space + ) - #'ccp_alpha': trial.suggest_float(f'ccp_alpha_{name}', 1e-5, 1e-1, log=True), - # 'max_samples': trial.suggest_float(f'max_samples_{name}', 0.05, 1.00), - 'random_state': random_state +def get_XGBRegressor_ConfigurationSpace(random_state=None): + space = { + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'subsample': Float("subsample", bounds=(0.05, 1.0)), + 'min_child_weight': Integer("min_child_weight", bounds=(1, 21)), + 'n_estimators': 100, + 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'nthread': 1, + 'verbosity': 0, + 'objective': 'reg:squarederror', } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( + space = space + ) -def make_regressor_config_dictionary(random_state=None, cv=None, n_samples=10): - n_samples = min(n_samples,100) #TODO optimize this +def get_AdaBoostRegressor_ConfigurationSpace(random_state=None): - regressor_config_dictionary = { - #ElasticNet: params_ElasticNet, - ElasticNetCV: { - 'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], - 'cv': cv, - }, - ExtraTreesRegressor: partial(params_ExtraTreesRegressor, random_state=random_state), - GradientBoostingRegressor: partial(params_GradientBoostingRegressor, random_state=random_state), - AdaBoostRegressor: partial(params_AdaBoostRegressor, random_state=random_state), - DecisionTreeRegressor: partial(params_DecisionTreeRegressor, random_state=random_state), - KNeighborsRegressor: partial(params_KNeighborsRegressor,n_samples=n_samples), - LassoLarsCV: partial(params_LassoLarsCV, cv=cv), - SVR: params_SVR, - RandomForestRegressor: partial(params_RandomForestRegressor, random_state=random_state), - RidgeCV: {'cv': cv}, - XGBRegressor: partial(params_XGBRegressor, random_state=random_state), - SGDRegressor: partial(params_SGDRegressor, random_state= random_state), + space = { + 'n_estimators': Integer("n_estimators", bounds=(50, 100)), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1.0), log=True), + 'loss': Categorical("loss", ['linear', 'square', 'exponential']), + } + + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + return ConfigurationSpace( + space = space + ) + +def get_ExtraTreesRegressor_ConfigurationSpace(random_state=None): + space = { + 'n_estimators': 100, + 'max_features': Float("max_features", bounds=(0.05, 1.0)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'bootstrap': Categorical("bootstrap", [True, False]), } - return regressor_config_dictionary \ No newline at end of file + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) \ No newline at end of file diff --git a/tpot2/config/regressors_sklearnex.py b/tpot2/config/regressors_sklearnex.py index 279d2dba..298407cb 100644 --- a/tpot2/config/regressors_sklearnex.py +++ b/tpot2/config/regressors_sklearnex.py @@ -13,63 +13,100 @@ from functools import partial +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -def params_RandomForestRegressor(trial, random_state=None, name=None): - return { + + +def get_RandomForestRegressor_ConfigurationSpace(random_state=None): + space = { 'n_estimators': 100, - 'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.0), - 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), - 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), - 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), - 'random_state': random_state + 'max_features': Float("max_features", bounds=(0.05, 1.0)), + 'bootstrap': Categorical("bootstrap", [True, False]), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) -def params_KNeighborsRegressor(trial, name=None, n_samples=100): - n_neighbors_max = max(n_samples, 100) - return { - 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max), - 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), + +def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): + return ConfigurationSpace( + space = { + 'n_neighbors': Integer("n_neighbors", bounds=(1, max(n_samples, 100))), + 'weights': Categorical("weights", ['uniform', 'distance']), } + ) -def params_LinearRegression(trial, name=None): - return {} +LinearRegression_configspace = ConfigurationSpace() -def params_Ridge(trial, random_state=None, name=None): - return { - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), - 'fit_intercept': True, - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'random_state': random_state + + +def get_Ridge_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'fit_intercept': Categorical("fit_intercept", [True]), + 'tol': Float("tol", bounds=(1e-5, 1e-1)), } -def params_Lasso(trial, random_state=None, name=None): - return { - 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), - 'fit_intercept': True, - 'precompute': trial.suggest_categorical(f'precompute_{name}', [True, False, 'auto']), - 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), - 'positive': trial.suggest_categorical(f'positive_{name}', [True, False]), - 'selection': trial.suggest_categorical(f'selection_{name}', ['cyclic', 'random']), - 'random_state': random_state + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_Lasso_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'fit_intercept': Categorical("fit_intercept", [True]), + 'precompute': Categorical("precompute", [True, False, 'auto']), + 'tol': 0.001, + 'positive': Categorical("positive", [True, False]), + 'selection': Categorical("selection", ['cyclic', 'random']), } -def params_ElasticNet(trial, random_state=None, name=None): - params = { - 'alpha': 1 - trial.suggest_float(f'alpha_{name}', 0.0, 1.0, log=True), - 'l1_ratio': 1- trial.suggest_float(f'l1_ratio_{name}',0.0, 1.0), - 'random_state': random_state - } - return params + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state -def params_SVR(trial, name=None): - params = { - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), + return ConfigurationSpace( + space = space + ) + +def get_ElasticNet_ConfigurationSpace(random_state=None): + space = { + 'alpha': Float("alpha", bounds=(0.0, 1.0)), + 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + + +def get_SVR_ConfigurationSpace(random_state=None): + space = { + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), 'max_iter': 3000, - 'tol': 0.005, + 'tol': 0.001, } - return params + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) def params_NuSVR(trial, name=None): return { @@ -81,14 +118,19 @@ def params_NuSVR(trial, name=None): 'tol': 0.005, } -def make_sklearnex_regressor_config_dictionary(random_state=None, n_samples=10): - return { - RandomForestRegressor: partial(params_RandomForestRegressor, random_state=random_state), - KNeighborsRegressor: params_KNeighborsRegressor, - LinearRegression: params_LinearRegression, - Ridge: partial(params_Ridge, random_state=random_state), - Lasso: partial(params_Lasso, random_state=random_state), - ElasticNet: partial(params_ElasticNet, random_state=random_state), - SVR: params_SVR, - NuSVR: params_NuSVR, +def get_NuSVR_ConfigurationSpace(random_state=None): + space = { + 'nu': Float("nu", bounds=(0.05, 1.0)), + 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), + 'C': Float("C", bounds=(1e-4, 25), log=True), + 'degree': Integer("degree", bounds=(1, 4)), + 'max_iter': 3000, + 'tol': 0.005, } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) \ No newline at end of file diff --git a/tpot2/config/special_configs.py b/tpot2/config/special_configs.py index a6745b6f..cdecfe7b 100644 --- a/tpot2/config/special_configs.py +++ b/tpot2/config/special_configs.py @@ -4,60 +4,43 @@ import numpy as np from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer -# ArithmeticTransformer -def params_arthmetic_operator(trial, name=None): - return { - 'function': trial.suggest_categorical(f'function_{name}', ["add", "mul_neg_1", "mul", "safe_reciprocal", "eq","ne","ge","gt","le","lt", "min","max","0","1"]), - } - -def make_arithmetic_transformer_config_dictionary(): - return { - AddTransformer: {}, - mul_neg_1_Transformer: {}, - MulTransformer: {}, - SafeReciprocalTransformer: {}, - EQTransformer: {}, - NETransformer: {}, - GETransformer: {}, - GTTransformer: {}, - LETransformer: {}, - LTTransformer: {}, - MinTransformer: {}, - MaxTransformer: {}, - } - +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +def get_ArithmeticTransformer_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'function': Categorical("function", ["add", "mul_neg_1", "mul", "safe_reciprocal", "eq","ne","ge","gt","le","lt", "min","max","0","1"]), + } + ) -def params_feature_set_selector(trial, name=None, names_list = None, subset_dict=None): - """Create a dictionary of parameters for FeatureSetSelector. - Parameters - ---------- - trial: optuna.trial.Trial - A trial corresponds to the evaluation of a objective function. - name: string - Used for compatibility in when calling multiple optuna of multiple parameters at once. - names_list: list of string - List of names of the feature set selector. To more easily keep track of what the subsets represent. - Included to prevent repeat calls to list(subset_dict.keys()) which may be slow and/or have different orderings - subset_dict: dictionary - A dictionary of subsets. The keys are the names of the subsets and the values are the subsets. - - Returns - ------- - params: dictionary - A dictionary of parameters for FeatureSetSelector. - """ +# def make_arithmetic_transformer_config_dictionary(): +# return { +# AddTransformer: {}, +# mul_neg_1_Transformer: {}, +# MulTransformer: {}, +# SafeReciprocalTransformer: {}, +# EQTransformer: {}, +# NETransformer: {}, +# GETransformer: {}, +# GTTransformer: {}, +# LETransformer: {}, +# LTTransformer: {}, +# MinTransformer: {}, +# MaxTransformer: {}, +# } - subset_name = trial.suggest_categorical(f'subset_name_{name}', names_list) - params = {'name': subset_name, - 'sel_subset': subset_dict[subset_name], - } +def get_FeatureSetSelector_ConfigurationSpace(names_list = None, subset_dict=None): + return ConfigurationSpace( + space = { + 'name': Categorical("name", names_list), + } + ) - return params def make_FSS_config_dictionary(subsets=None, n_features=None, feature_names=None): """Create the search space of parameters for FeatureSetSelector. @@ -95,14 +78,8 @@ def make_FSS_config_dictionary(subsets=None, n_features=None, feature_names=None names_list = list(subset_dict.keys()) - return {FeatureSetSelector: partial(params_feature_set_selector, names_list = names_list, subset_dict=subset_dict)} - - - -from tpot2.builtin_modules import Passthrough - -def params_passthrough(trial, name=None): - return {} + return ConfigurationSpace({ + 'name': Categorical("name", names_list), + 'subset_dict': Categorical("subset", subset_dict), + }) -def make_passthrough_config_dictionary(): - return {Passthrough: params_passthrough} \ No newline at end of file diff --git a/tpot2/config/transformers.py b/tpot2/config/transformers.py index fca4932c..f74d5e18 100644 --- a/tpot2/config/transformers.py +++ b/tpot2/config/transformers.py @@ -13,12 +13,12 @@ ) PCA_configspace = ConfigurationSpace( - space={'n_components': Float('n_components', bounds=(0.001, 0.999))} + space={'n_components': Float('n_components', bounds=(0.5, 0.999))} ) -ZeroCount_configspace = ConfigurationSpace() +ZeroCount_configspace = {} -OneHotEncoder_configspace = ConfigurationSpace() #TODO include the parameter for max unique values +OneHotEncoder_configspace = {} #TODO include the parameter for max unique values def get_FastICA_configspace(n_features=100, random_state=None): @@ -35,7 +35,7 @@ def get_FastICA_configspace(n_features=100, random_state=None): space = space ) - +#TODO conditional parameters def get_FeatureAgglomeration_configspace(n_features=100): return ConfigurationSpace( space = { diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index e44dc4f1..6bea7615 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -11,17 +11,23 @@ class EstimatorNodeIndividual(SklearnIndividual): def __init__(self, method: type, - space: ConfigurationSpace, + space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type? rng=None) -> None: super().__init__() self.method = method self.space = space - rng = np.random.default_rng(rng) - self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + if isinstance(space, dict): + self.space = space + else: + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = self.space.sample_configuration().get_dictionary() def mutate(self, rng=None): + if isinstance(self.space, dict): + return False + rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) self.hyperparameters = self.space.sample_configuration().get_dictionary() @@ -29,6 +35,9 @@ def mutate(self, rng=None): return True def crossover(self, other, rng=None): + if isinstance(self.space, dict): + return False + rng = np.random.default_rng(rng) if self.method != other.method: return False diff --git a/tpot2/config/all_single_modules.py b/tpot2/search_spaces/nodes/fss_node.py similarity index 100% rename from tpot2/config/all_single_modules.py rename to tpot2/search_spaces/nodes/fss_node.py From 5b5b22254c08bc7a6526a2897863c20a3fcd598e Mon Sep 17 00:00:00 2001 From: perib Date: Fri, 22 Mar 2024 15:10:31 -0700 Subject: [PATCH 03/75] edits --- Tutorial/2_Search_Spaces.ipynb | 178 +++++++++-------- Tutorial/5_GraphPipeline.ipynb | 4 +- .../Example_Search_Spaces/imputation.ipynb | 85 ++++++++ tpot2/config/classifiers.py | 21 +- tpot2/config/get_configspace.py | 188 ++++++++++-------- tpot2/config/imputers.py | 9 +- tpot2/config/mdr_configs.py | 3 - tpot2/graphsklearn.py | 92 ++------- .../nodes/estimator_node_custom_sampler.py | 52 +++++ tpot2/tpot_estimator/estimator.py | 26 +-- tpot2/tpot_estimator/estimator_utils.py | 9 +- .../templates/tpot_autoimputer.py | 0 12 files changed, 388 insertions(+), 279 deletions(-) create mode 100644 Tutorial/Example_Search_Spaces/imputation.ipynb create mode 100644 tpot2/search_spaces/nodes/estimator_node_custom_sampler.py create mode 100644 tpot2/tpot_estimator/templates/tpot_autoimputer.py diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index 853ca61f..e21d6c5c 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -31,7 +31,7 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 6, 'p': 2, 'weights': 'distance'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 1, 'p': 1, 'weights': 'distance'}\n" ] } ], @@ -78,7 +78,7 @@ "| Name | Info |\n", "| :--- | :----: |\n", "| EstimatorNode | Takes in a ConfigSpace along with the class of the method. This node will optimize the hyperparameters for a single method. |\n", - "| GeneticFeatureSelector | Uses evolution to optimize a set of features, exports a basic sklearn Selector that simply selects the features chosen by the node. |\n", + "| GeneticFeatureSelectorNode | Uses evolution to optimize a set of features, exports a basic sklearn Selector that simply selects the features chosen by the node. |\n", "\n", "\n", "\n", @@ -94,7 +94,7 @@ "| Name | Info |\n", "| :--- | :----: |\n", "| ChoicePipeline | Takes in a list of search spaces. Will select one node from the search space. |\n", - "| SquentialPipeline | Takes in a list of search spaces. will produce a pipeline of Squential length. Each step in the pipeline will correspond to the the search space provided in the same index. |\n", + "| SequentialPipeline | Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index. |\n", "| DynamicLinearPipeline | Takes in a single search space. Will produce a linear pipeline of variable length. Each step in the pipeline will be pulled from the search space provided. |\n", "| TreePipeline |Generates a pipeline of variable length. Pipeline will have a tree structure similar to TPOT1. |\n", "| GraphPipeline | Generates a directed acyclic graph of variable size. Search spaces for root, leaf, and inner nodes can be defined separately if desired. |\n", @@ -154,9 +154,9 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 4, 'p': 3, 'weights': 'distance'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 8, 'p': 1, 'weights': 'uniform'}\n", "mutated hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 8, 'p': 2, 'weights': 'uniform'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n" ] } ], @@ -187,14 +187,14 @@ "output_type": "stream", "text": [ "original hyperparameters for individual 1\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 6, 'p': 3, 'weights': 'distance'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 1, 'weights': 'uniform'}\n", "original hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 3, 'p': 3, 'weights': 'distance'}\n", "\n", "post crossover hyperparameters for individual 1\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'distance'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 9, 'p': 3, 'weights': 'uniform'}\n", "post crossover hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 3, 'p': 3, 'weights': 'distance'}\n" ] } ], @@ -233,10 +233,10 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(n_jobs=1, p=3, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, n_neighbors=9, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(n_jobs=1, p=3, weights='distance')" + "KNeighborsClassifier(n_jobs=1, n_neighbors=9, p=3)" ] }, "execution_count": 5, @@ -268,7 +268,18 @@ "cell_type": "code", "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import tpot2\n", "from ConfigSpace import ConfigurationSpace\n", @@ -335,22 +346,22 @@ ")\n", "\n", "\n", - "# tpot2.search_spaces.pipelines.ChoicePipeline(\n", - "# choice_list = [\n", - "# tpot2.search_spaces.nodes.EstimatorNode(\n", - "# method = KNeighborsClassifier,\n", - "# space = knn_configspace,\n", - "# ),\n", - "# tpot2.search_spaces.nodes.EstimatorNode(\n", - "# method = LogisticRegression,\n", - "# space = lr_configspace,\n", - "# ),\n", - "# tpot2.search_spaces.nodes.EstimatorNode(\n", - "# method = DecisionTreeClassifier,\n", - "# space = dt_configspace,\n", - "# ),\n", - "# ]\n", - "# )" + "tpot2.search_spaces.pipelines.ChoicePipeline(\n", + " choice_list = [\n", + " tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = KNeighborsClassifier,\n", + " space = knn_configspace,\n", + " ),\n", + " tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = LogisticRegression,\n", + " space = lr_configspace,\n", + " ),\n", + " tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = DecisionTreeClassifier,\n", + " space = dt_configspace,\n", + " ),\n", + " ]\n", + ")" ] }, { @@ -375,10 +386,13 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression(C=0.4989834645092814, class_weight='balanced', dual=True,\n",
+       "                   max_iter=1000, n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=3)" + "LogisticRegression(C=0.4989834645092814, class_weight='balanced', dual=True,\n", + " max_iter=1000, n_jobs=1)" ] }, "execution_count": 7, @@ -408,10 +422,13 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=7, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(max_depth=9, max_features='log2', min_samples_leaf=12,\n",
+       "                       min_samples_split=4)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=7, p=1)" + "DecisionTreeClassifier(max_depth=9, max_features='log2', min_samples_leaf=12,\n", + " min_samples_split=4)" ] }, "execution_count": 8, @@ -449,27 +466,16 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampled pipeline 1\n" + "ename": "TypeError", + "evalue": "unhashable type: 'list'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#same pipeline search space as before.\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m classifier_choice \u001b[38;5;241m=\u001b[39m \u001b[43mtpot2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mKNeighborsClassifier\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mLogisticRegression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDecisionTreeClassifier\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msampled pipeline 1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5\u001b[0m classifier_choice\u001b[38;5;241m.\u001b[39mgenerate()\u001b[38;5;241m.\u001b[39mexport_pipeline()\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:169\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_search_space\u001b[39m(name, n_classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, n_samples\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 169\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[43mGROUPNAMES\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" ] - }, - { - "data": { - "text/html": [ - "
DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt',\n",
-       "                       min_samples_leaf=7, min_samples_split=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt',\n", - " min_samples_leaf=7, min_samples_split=5)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -482,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -495,13 +501,10 @@ { "data": { "text/html": [ - "
LogisticRegression(C=0.22118566188988883, class_weight='balanced',\n",
-       "                   max_iter=1000, n_jobs=1, solver='sag')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=96)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "LogisticRegression(C=0.22118566188988883, class_weight='balanced',\n", - " max_iter=1000, n_jobs=1, solver='sag')" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=96)" ] }, "execution_count": 10, @@ -516,28 +519,23 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampled pipeline 1\n" + "ename": "KeyError", + "evalue": "'AdaBoostClassifier'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#search space for all classifiers\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m classifier_choice \u001b[38;5;241m=\u001b[39m \u001b[43mtpot2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclassifiers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msampled pipeline 1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5\u001b[0m classifier_choice\u001b[38;5;241m.\u001b[39mgenerate()\u001b[38;5;241m.\u001b[39mexport_pipeline()\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:180\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m#if list of names, return a list of EstimatorNodes\u001b[39;00m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, \u001b[38;5;28mlist\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[0;32m--> 180\u001b[0m search_spaces \u001b[38;5;241m=\u001b[39m [get_search_space(n, n_classes\u001b[38;5;241m=\u001b[39mn_classes, n_samples\u001b[38;5;241m=\u001b[39mn_samples, random_state\u001b[38;5;241m=\u001b[39mrandom_state) \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:180\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m#if list of names, return a list of EstimatorNodes\u001b[39;00m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, \u001b[38;5;28mlist\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[0;32m--> 180\u001b[0m search_spaces \u001b[38;5;241m=\u001b[39m [\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_classes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:183\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_estimatornode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_classes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:190\u001b[0m, in \u001b[0;36mget_estimatornode\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_estimatornode\u001b[39m(name, n_classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, n_samples\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 187\u001b[0m configspace \u001b[38;5;241m=\u001b[39m get_configspace(name, n_classes\u001b[38;5;241m=\u001b[39mn_classes, n_samples\u001b[38;5;241m=\u001b[39mn_samples, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[0;32m--> 190\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m EstimatorNode(\u001b[43mSTRING_TO_CLASS\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m, configspace)\n", + "\u001b[0;31mKeyError\u001b[0m: 'AdaBoostClassifier'" ] - }, - { - "data": { - "text/html": [ - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=89)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=89)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -550,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -599,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -631,9 +629,23 @@ ], "source": [ "stc_pipeline = tpot2.search_spaces.pipelines.SequentialPipeline([\n", - " tpot2.config.get_search_space(\"selectors\"),\n", + " tpot2.config.get_search_space(\"selectors\"), \n", " tpot2.config.get_search_space(\"transformers\"),\n", " tpot2.config.get_search_space(\"classifiers\"),\n", + " \n", + "])\n", + "\n", + "stc_pipeline = tpot2.search_spaces.pipelines.SequentialPipeline([\n", + " tpot2.config.get_search_space(\"preprocessors1\"), \n", + " tpot2.config.get_search_space(\"imputation\"), \n", + " tpot2.config.get_search_space(\"selectors\"), \n", + " tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space(\"classifiers\"),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\",\"classifiers\"]),\n", + " max_size = 10,\n", + " )\n", + " \n", "])\n", "\n", "\n", @@ -643,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -696,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -766,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -788,7 +800,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/Tutorial/5_GraphPipeline.ipynb b/Tutorial/5_GraphPipeline.ipynb index 47d48c9d..320c68f1 100644 --- a/Tutorial/5_GraphPipeline.ipynb +++ b/Tutorial/5_GraphPipeline.ipynb @@ -19,7 +19,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABmK0lEQVR4nO3deUDUdf4/8Occ3MitgCBqhErMcIYYeK6m5l1pHm2gZpnbVq5Zfls7bc1q09wOtVKTdreyX7tubGa6pnmhiYAMjAfgAQhCIDcMxxy/P7Spj0d5DLxnhufjr3rBzDwHi8/T93s+n4/MZDKZQEREREQ2Ty46ABERERFZBosdERERkZ1gsSMiIiKyEyx2RERERHaCxY6IiIjITrDYEREREdkJFjsiIiIiO8FiR0RERGQnWOyIiIiI7ASLHREREZGdYLEjIiIishMsdkRERER2gsWOiIiIyE6w2BERERHZCRY7IiIiIjvBYkdERERkJ1jsiIiIiOwEix0RERGRnWCxIyIiIrITLHZEREREdoLFjoiIiMhOsNgRERER2QkWOyIiIiI7wWJHREREZCdY7IiIiIjsBIsdERERkZ1gsSMiIiKyEyx2RERERHZCKToAEZElGQwGVFdXo6KiAhUVFagsL0erTgejwQC5QgEnFxd0DwiAv78//P394ePjA4VCITo2EZFFyEwmk0l0CCKiW1VTU4OcnBzkZmWhpakJJr0e7jodPKur4aDXQ24ywSiToV2pRJ2PDxpdXCBTKuHs5gZ1bCyioqLg7e0t+m0QEd0SFjsismllZWVI378fZwoK4NDcjJDiEgRWV8OzqQkOBsM1H9euUKDOzQ3nfXxQHNIL7a6u6BsWhqQhQxAYGNiJ74CIyHJY7IjIJun1ehw4cAAZBw7AvaoKtxcVI7iqCgqj8YafyyCX45yfHwp7h6DRzw/xSUlISkqCUslPqxCRbWGxIyKbU15ejq1paag5V4oBBQUIKy2F3AK/yowyGQqCgnAiLAw+wUEYN2kSAgICLJCYiKhzsNgRkU0pKirCls2b4Vp2HnHHj8Ojudnir1Hv6orM8HA09+yJe6c/gN69e1v8NYiIOgKLHRHZjKKiIvzrs8/gW1SMgceOQXkT267XSy+X44eIO1AdEoL7Z85kuSMim8Dr2BGRTSgvL8eWzZvhU1SMQVpth5Y6AFAajbgrTwuf4mJs2fwFysvLO/T1iIgsgcWOiKyeXq/H1rQ0uJadR8KxYxb5PN31kJtMSNAeg8v5MnyTlga9Xt8pr0tEdLNY7IjI6h04cAA150oRd/x4h6/UXU5pNCLu2HFUl5YiPT29U1+biOhGsdgRkVUrKytDxoEDGFBQ0CEnSlwPz+Zm9M8vwOH9+3H+/HkhGYiIrgeLHRFZtfT9++FeVYWw0lKhOfqVlsK9qgoH9u8XmoOI6New2BGR1aqpqcGZggLcXlTcaZ+ruxa5yYTQomKcyc9HTU2N0CxERNfCYkdEVisnJwcOzc0IrqoSHQUA0KuqCsrmZmg0GtFRiIiuisWOiKySwWBAblYWQopLbuo2YR1BYTSid0kJNJmZMPzKfWiJiERhsSOim+Ln53fLzzFu3DjodLqrfq26uhrffvstAqurAQAVra14+uSJX32+8P37MCk7C+OyMjFfq0V9B1yeJPBCNVqamlB9Kdf1WLduHTZv3mzxLEREl+OdJ4jopvj5+aGqA7dI8/LykDBwII7ED7zuS5wMPHQQhwfdBQBYfPIkQl1dsKBXyC3lMJhMUMhk5n9vVyjw9bChGDdtGlQq1S09NxGRpXHFjogsZseOHYiOjoZKpcKiRYvw098b165di379+mHkyJGYMWMG3nvvPQBAnz590NjYiMbGRowdOxZqtRpqtRrbt2/HihUr0NLSgvsyj+DlU4U419KC+45mAwDajUYsO1WICVmZmJiVhe1XKZhxHh4ob20FAFS1tWHBsWO472g2ZmhycOrSZVPO6Jpx39FsTMs5ihWnT5uf/52iIrxYWICU3Fy8dvo0zup0mJ2Xi3uzs/FozlEYKitRUVGB1atXo3///oiMjMSCBQsAAJ9//jnCw8MRFRWFyZMnAwBefvll83vOysrCwIEDERkZieTkZLS0tJh/Fi+//DJiYmIQHx/Py6oQ0U1Rig5ARPZBp9PhkUcewZ49exASEoJJkyZhy5YtSEhIwMqVK5GZmQmlUonY2FgMHjxY8tjt27fD19cX3377LUwmExoaGlBTVYW0f/0LaTGxAIBzlwoQAHxeXo4GvQFpMbGQy2So07dLns9gMuFAbQ3u9w8AACw/fRqPh/SCyr0bNA0NeO30aWxQqbD89Gks6NULd/v6YdXZs5LnyG9qxidqNRzlcszOy8Xy28MQ5OyMbVWV+Gr3bvRNSsKyZctQUlICNzc31NXVXXyt5cuRlpaGsLAw8+yXUlJSsH79eiQkJGDBggVYs2YNFi1aBAAICQlBdnY2XnzxRaxfvx4vvPDCrf2hEFGXwxU7IrKIkydPon///ujTpw/kcjlmzZqFffv2ISMjAyNHjoSnpyfc3NwwYcKEKx6rVquxb98+PPvsszh06BA8PDzQqtNBdpXXAYBDdbWYHhAA+aUtUk+lAwCgQa/HpOws3PXDIdS0t2OIt7f5+/9cUIBJ2Vl4vrAAle1tAABtYyNG+fgCAMZ37y55jZG+PnCUy9Go1yOrvh4Ljh/DpOwsvF9cjNrGRrS1tGDgwIH4/e9/j08//RQODhczJCUlYf78+Vi/fj0u/6RLXV0dWltbkZCQAAB46KGHsG/fPvPXf1rhi4uLw9nLiiYR0fXgih0RdQiTyQSZTHZFubnax3r79euH7OxsbN26FU899RSSk5Phdqko3YhuSiXSYmKhMxgwJy8Pn54vQ3LPIADAlugYyWflrsh72b87yxXmf/ZzcDSvHAJAzm190aDXY+vWrfj+++/x73//G6tWrcKRI0ewdu1aHDp0CP/9738RExODY8eOXfO9//Qz+omTkxMAQKFQ8KxbIropXLEjIovo378/8vPzUVRUBKPRiM8//xxDhgxBfHw8du3ahfr6ejQ3N+Obb7654rFlZWVwc3NDcnIynnrqKRw9ehRyhQJymQyGqxTBRC8vbC4vh/HS1y7finVRKLD0ttuwsbQUepMJAz098Xn5xc+sGU0mnGxqAgDc4e6OXZfObv22qvKq78tdqYSPgwO+v/R97UYjiuvqIZPLUVJSgpEjR2LVqlU4e/YsDAYDTp8+jbvuugvLly+Ho6MjLly4YH4uLy8vODk5ISMjAwDw6aefYsiQITf0cyYi+jVcsSOim1JTU4Pg4GDzv7/99tv48MMPMXnyZOj1eowePRpTpkyBTCbDwoULceeddyIkJAQxMTHw8PCQPFdubi4WL14MhUIBFxcXbNiwAZrsbAwOC8OErEwkeHlhXtDPrzU9IBCnm3WYkJ0FBWT4Y0gIxlx2+RV1t27o5+qG7VVVeOG2ULxYWIjPz5+H3mTClB7+6O/mhj/3vQ2LT57EunMliPfwhLtCgatZ2b8/XiwsxMqzZ2GACSPi7sRQR0c8+OCDaGhogMlkwiuvvAKFQoHFixejsLAQJpMJU6dOlfyMAGDTpk1YsGABWlpaEB0dbT7pgojIEni5EyLqcE1NTXBzc4NOp8PQoUOxceNGqNXqX33Md999h5Pbt+Pug4c6LJfOYICzXA6ZTIb1586hqr0N/9f3tt983P/uGoT+Y8Zg5MiRHZaNiOhmcMWOiDrc888/j927d6OlpQXJycm/WeoAwN/fH5kuLmhXKODQQZ830zQ0YPmZ0zCaTPB3csJf+/X7zce0KxRodHGBv79/h2QiIroVLHZE1OHefvvtG36Mv78/ZEol6tzc4Fdf3wGpgAQvL8lJEdejzs0NMqWSxY6IrBJPniAiq+Tj4wNnNzec9/ERHUXivO/FXD5WlouICGCxIyIrpVAooI6NRXFILxjk1vGryiCXo6hXL0TGxUFxjRMtiIhEso7flkREVxEVFYV2V1ecu+yMV0upb6hH2fnz+LHyR7Tr9b/5/SV+ftC7uiIyMrJD8hAR3SoWOyKyWt7e3ugbFobC3iEw/srFhW9Gu16PxsZGACbo9XpUV1ebr4t3NUaZDKd6h6Bvv37wvnRHCyIia8NiR0RWLWnIEDT6+aEgKKhDX8dg0KP+V07SyA8KQqOfH5Iuu88tEZE1YbEjIqsWGBiI+KQknAgLQ72rq+RrJgCtbW0wGo03/LwOSiUcHZ0ks+bmJrS0tl7xvXWurjjZLwwDBw9GYGDgDb8WEVFnYbEjIquXlJQE7+AgZIaHQ3/pRIpmnQ7l58/jwoUqlFdUQNfScsPP6+XlBZlM+muwtrZWsiWrl8uReUc4fIKCkJiYeGtvhIiog7HYEZHVUyqVGD9pEpp79sTBAf3x44ULqK2tgQk/FTATGm7iWndKheKK25sZjQbU1dVd/GeZDD9E3AFdYE+MmzQJSiUv/UlE1o3FjohsQltbG3JPnsBJV1ccvTMOhssvN3KTJ1e4ubrCyclZMtPpmtHU1oaDqghUh4Tg3ukPICAg4GajExF1Gt4rlois3q5duzBx4kQ0NzcjJCQE06ZMQc/mZoRnZsL10kqdm6sbPD09b+r5DQYDfqyshMl08bN6TR4eOHlnPEy39cX9M2eid+/eFnsvREQdicWOiKze0KFDsW/fPvO/9+jRA5PGj0eQtzfCTpxAz/x8+Hh4wvWykytuRLNOh+q6WpT164eCAQNQWl0NXXs7/v73v0Nm4UutEBF1FH5ghIis3uWfg/vxxx/x8SefIDExEa3x8SgPDsYd5RXoW1sLxU2cIWuQy/Fj797Q+sejwsUFBzIykJ6eDoPBgAkTJmDGjBmWeitERB2KK3ZEZPUKCwsxYsQInDt37oqvBQQEICkxEfHR0XBsaUHvkhIEXqiGZ1MTHAyGaz5nu0KBOjc3nPf1QVGvXtC7uiKwVy+8unw58vPzzd/n7e0NrVbLy5wQkU1gsSMiq2cymTB27Fjs2LHjql9XKpUoLy9HXl4eNJmZaGlqgkmvh7tOB4/qGjjq9ZCbjDDK5GhTKlHv441GFxfIlEo4u7khMi4OkZGR8Pb2xhdffIHp06dLnn/ChAlIS0vjliwRWT0WOyKyehs3bsTDDz98za/feeedyMjIAHDxRIjq6mpUVFSgoqICleXlaGtpgUGvh0KphKOzM7oHBMDf3x/+/v7w8fGB4rIzbKdPn44vvvjiigxz5syx/JsjIrIgFjsismpFRUVQq9VoaGgwz7p3745Ro0bhyy+/hJeXF7788ksMHTrUYq9ZVVUFlUqFiooK88zDwwO5ubkICQmx2OsQEVkaix0RWS2j0YjRo0fju+++k8zT0tIwceJE6HQ6ODs7d8gWaVpaGiZPniyZjRo1Cjt27OCWLBFZLV6gmIis1rp1664odbNnz8bEiRMBAC4uLh1WsiZNmoSUlBTJbOfOnVi3bl2HvB4RkSVwxY6IrFJhYSGioqLQ3NxsngUHByMvL++mL0R8o2pra6FSqVBaWmqeubq6QqPRIDQ0tFMyEBHdCK7YEZHVMRgMmDNnjqTUAcCGDRs6rdQBgJeXFzZu3CiZNTc3Y86cOTD8yqVUiIhEYbEjIquzevVq7N+/XzJ77LHHMHr06E7PMnr0aMyfP18y27dvH/72t791ehYiot/CrVgisirHjx9HTEwMWltbzbO+fftCo9HA3d1dSKaGhgZERUXhzJkz5pmTkxOys7MRHh4uJBMR0dVwxY6IrIZer0dKSoqk1MlkMmzatElYqQOAbt264eOPP5bMWltbkZKSAr1eLygVEdGVWOyIyGq88cYb5gsN/2ThwoUWvUbdzRo2bBgWLlwomWVkZODNN98UE4iI6Cq4FUtEViEnJwfx8fFob283z/r374/s7Gy4uLgITPYznU6H6Ohoyb1kHRwckJGRgaioKIHJiIgu4oodEQnX1taG5ORkSamTy+VITU21mlIHXLxuXmpqKuTyn391tre3IyUlBW1tbQKTERFdxGJHRMItW7YMGo1GMluyZAkSEhIEJbq2QYMG4dlnn5XMcnJy8OqrrwpKRET0M27FEpFQhw8fRmJiouS6cGq1GhkZGXBychKY7NpaW1tx5513Ii8vzzxTKBQ4ePAg4uPjBSYjoq6OxY6IhNHpdIiNjcWJEyfMM6VSiYyMDERHR4sLdh2ys7MxcOBAyVmx4eHhyMrKgrOzs8BkRNSVcSuWiIR54YUXJKUOAF588UWrL3UAEBMTgxdeeEEyO378+BUzIqLOxBU7IhJi3759GDZsGH75KyguLg4HDx6Eg4ODwGTXr729HXfddRcyMzPNM5lMhr1792Lw4MECkxFRV8ViR0SdrrGxEVFRUTh9+rR55uTkhMzMTERERAhMduO0Wi1iY2MlZ8WGhoYiJycHbm5uApMRUVfErVgi6nRLliyRlDoAePXVV22u1AFAREQE/vKXv0hmp06dwpIlSwQlIqKujCt2RNSpdu7cibvvvlsyS0xMxN69e6FQKASlujUGgwFDhw5Fenq6ZL5z506MHDlSUCoi6opY7Iio09TV1UGtVqOkpMQ8c3FxQU5ODsLCwgQmu3UFBQWIioqCTqczz0JCQqDRaODp6SkwGRF1JdyKJaJOs2jRIkmpA4A333zT5ksdAISFheGNN96QzIqLi7Fo0SJBiYioK+KKHRF1iq+//hoTJ06UzEaMGIGdO3dKbtFly4xGI0aNGoXdu3dL5l9//TXGjx8vKBURdSUsdkTU4S5cuACVSoXy8nLzrFu3btBoNOjTp4+4YB3g7NmzUKvVaGxsNM8CAgKg1Wrh4+MjMBkRdQX28ddkIrJqTzzxhKTUAcCqVavsrtQBQJ8+ffD2229LZuXl5XjiiScEJSKiroQrdkTUob788ktMmzZNMrvnnnuwdetWyGQyQak6lslkwvjx47Ft2zbJ/Msvv8T9998vKBURdQUsdkTUYX788UdERESgqqrKPPPy8oJWq0XPnj0FJut4paWlUKlUqK2tNc/8/Pyg1WrRo0cPccGIyK5xK5aIOoTJZML8+fMlpQ4A3nvvPbsvdQAQFBSEd999VzKrqqrCY489Bv59mog6CosdEXWIf/7zn/jPf/4jmd17772YNWuWmEACPPjgg7j33nslsy1btuDTTz8VlIiI7B23YonI4rgN+bNrbUfn5eUhKChIYDIiskdcsSMiizKZTJg3b56k1AHAunXrulypA4AePXpg7dq1klltbS0eeeQRbskSkcWx2BGRRW3YsAHffvutZDZr1qwufTbo1KlTMXPmTMls27Zt2LBhg6BERGSvuBVLRBZztYvzBgYGIi8vr8tfnLe6uhoRERGS6/m5u7sjNzfXLq/nR0RicMWOiCzCaDRi7ty5klIHAOvXr+/ypQ4AfHx8sH79esmssbERc+fOhdFoFJSKiOwNix0RWcT7779/xT1SH374YYwbN05QIuszfvx4zJ07VzLbvXs31qxZIygREdkbbsUS0S3Lz89HdHQ0dDqdeRYSEoLc3Fx4eHgITGZ96urqoFarUVJSYp65uLggJycHYWFhApMRkT3gih0R3RKDwYDZs2dLSh0AbNy4kaXuKjw9PbFx40bJTKfTYfbs2TAYDIJSEZG9YLEjoluycuVKHDx4UDJ7/PHHMXLkSEGJrN+oUaPwhz/8QTJLT0/HqlWrBCUiInvBrVgiumlarRaxsbFoa2szz0JDQ5GTkwM3NzeByaxfY2MjoqOjcerUKfPM0dERWVlZiIiIEJiMiGwZV+yI6Ka0t7cjOTlZUupkMhlSU1NZ6q6Du7s7Nm3aBJlMZp61tbUhJSUF7e3tApMRkS1jsSOim7JixQpkZWVJZk8//TSSkpIEJbI9gwcPxqJFiySzzMxMvP7664ISEZGt41YsEd2wrKwsJCQkQK/Xm2fh4eHIysqCs7OzwGS2R6fTITY2FidOnDDPlEolDh8+jJiYGIHJiMgWccWOiG5Ia2srUlJSJKVOoVAgNTWVpe4muLi4IDU1FQqFwjzT6/VISUlBa2urwGREZItY7Ijohrz88svIy8uTzJ577jnEx8cLSmT7Bg4ciP/7v/+TzHJzc/HKK68ISkREtopbsUR03Q4dOoSkpCTJLbCioqJw+PBhODo6Ckxm+9ra2hAfHw+NRmOeyeVypKenIyEhQWAyIrIlLHZEdF2am5sRExOD/Px888zBwQFHjhxBZGSkwGT2IycnB/Hx8ZKzYvv164fs7Gy4uroKTEZEtoJbsUR0XZYuXSopdcDFbVmWOsuJiorCSy+9JJnl5+dj6dKlghIRka3hih0R/aY9e/Zg+PDhktnAgQNx4MABKJVKMaHslF6vR2JiIjIyMswzmUyG3bt3Y9iwYQKTEZEtYLEjol/V0NCAqKgonDlzxjxzdnZGdnY2BgwYIDCZ/Tp+/DhiYmIkZ8X27dsXGo0G7u7uApMRkbXjViwR/apnnnlGUuoAYPny5Sx1HSg8PByvvfaaZHbmzBk888wzghIRka3gih0RXdP27dsxduxYyWzIkCHYvXu35LprZHkGgwHDhw/H/v37JfPt27dj9OjRglIRkbVjsSOiq6qtrYVKpUJpaal55urqCo1Gg9DQUIHJuo5Tp04hMjISzc3N5llwcDByc3Ph5eUlLhgRWS1uxRLRVS1cuFBS6gDgrbfeYqnrRKGhofjrX/8qmZ07dw4LFy4UE4iIrB5X7IjoCmlpaZg8ebJkNmrUKOzYsQMymUxQqq7JaDRizJgx2Llzp2T+1VdfYdKkSYJSEZG1YrEjIomqqiqoVCpUVFSYZx4eHsjNzUVISIjAZF1XcXEx1Go16uvrzTN/f39otVr4+voKTEZE1oZbsUQk8fjjj0tKHQCsXr2apU6gkJAQrF69WjKrqKjA448/LiYQEVktrtgRkdnmzZsxY8YMyWzChAlIS0vjFqxgJpMJkyZNwtdffy2Zb968GQ888ICgVERkbVjsiAgAUF5ejoiICFRXV5tn3t7e0Gq1CAwMFJiMfnL+/HlERESgpqbGPPP19UVeXh4CAgIEJiMia8GtWCKCyWTC/PnzJaUOANasWcNSZ0UCAwPx/vvvS2YXLlzA/Pnzwb+jExHAYkdEAD755BOkpaVJZlOnTsX06dMFJaJrmTFjBqZOnSqZpaWl4e9//7ugRERkTbgVS9TFlZSUQK1Wo66uzjzr0aMH8vLy0L17d4HJ6FoqKysRERGByspK88zT0xN5eXkIDg4WmIyIROOKHVEXZjKZMG/ePEmpA4APPviApc6Kde/eHR9++KFkVldXh4cffphbskRdHIsdURf24YcfYseOHZLZQw89hClTpogJRNdtypQp+P3vfy+Z7dix44rCR0RdC7diibqo06dPIzIyEk1NTeZZz549kZeXB29vb4HJ6HrV1NRApVKhrKzMPHNzc4NGo8Ftt90mMBkRicIVO6IuyGg0Ys6cOZJSBwAbNmxgqbMh3t7e2LBhg2TW1NSEuXPnwmg0CkpFRCKx2BF1Qe+88w727t0rmT3yyCMYO3asoER0s8aOHYtHHnlEMtuzZw/effddQYmISCRuxRJ1MSdPnkR0dDRaWlrMsz59+kCj0aBbt24Ck9HNamhogFqtRlFRkXnm7OyMo0ePon///gKTEVFn44odURei1+uRkpIiKXUAsHHjRpY6G9atWzd8/PHHkllLSwtmz54NvV4vKBURicBiR9SFvPXWW/jhhx8ksyeffBIjRowQlIgsZcSIEXjiiScks0OHDuGtt94SlIiIROBWLFEXkZubi7i4OLS3t5tnYWFhOHr0KFxdXQUmI0tpbm5GdHQ0CgoKzDNHR0ccOXIEarVaYDIi6ixcsSPqAtra2pCSkiIpdXK5HJs2bWKpsyOurq7YtGkT5PKff7X/9Gff1tYmMBkRdRYWO6IuYPny5cjOzpbMFi9ejMTEREGJqKMkJiZi8eLFkll2djZee+01QYmIqDNxK5bIzmVmZiIhIQEGg8E8i4iIwJEjR+Ds7CwwGXWUlpYW3HnnndBqteaZQqHADz/8gLi4OIHJiKijccWOyI61tLQgOTlZUuoUCgVSU1NZ6uyYs7MzUlNToVAozDODwXDVM6KJyL6w2BHZsZdeegnHjh2TzJ5//nmu2nQBcXFxeP755yUzrVaLl156SVAiIuoM3IolslPp6ekYPHgwfvm/eExMDH744Qc4ODgITEadpb29HQkJCZLPV8pkMuzfv5+frySyUyx2RHaoqakJ0dHRKCwsNM942YuuKTc3F3feeafkrNjbb78dR48ehZubm8BkRNQRuBVLZIeee+45SakDgGXLlrHUdUFqtRqvvPKKZFZYWIjnnntOUCIi6khcsSOyM7t27cLIkSMls0GDBmH//v2SD9NT16HX6zFkyBAcOnRIMt+1axfvOkJkZ1jsiOxIfX09IiMjJTeDd3FxwdGjR9GvXz+ByUi0kydPIjo6WnJWbO/evaHRaODh4SEwGRFZErdiiezI008/LSl1ALBixQqWOkL//v3x+uuvS2ZFRUVXXMyYiGwbV+yI7MS2bdswbtw4yWzYsGHYtWuX5BZT1HUZjUb87ne/w549eyTzb775Bvfcc4+gVERkSSx2RHagpqYGKpUKZWVl5pm7uzs0Gg369u0rMBlZmzNnzkCtVqOpqck869mzJ/Ly8uDt7S0wGRFZAv8aT2QHnnzySUmpA4CVK1ey1NEV+vbti5UrV0pmZWVlePLJJwUlIiJL4oodkY3bsmUL7rvvPslszJgx2LZtG2QymaBUZM1MJhPGjh2LHTt2SOb//ve/ce+99wpKRUSWwGJHZMMqKysRERGByspK88zT0xN5eXkIDg4WmIys3blz56BSqVBXV2eede/eHVqtFt27dxeYjIhuBbdiiWyUyWTCggULJKUOAN555x2WOvpNwcHBeOeddySzyspKLFiwAPz7PpHt4oodkY367LPPMGvWLMls8uTJ2LJlC7dg6bqYTCZMmTIFaWlpkvmnn36KmTNnCkpFRLeCxY7IBpWVlUGlUqGmpsY88/X1hVarhb+/v8BkZGvKy8uhUqlw4cIF88zb2xtarRaBgYECkxHRzeBWLJGNMZlMePTRRyWlDgDWrl3LUkc3LCAgAGvWrJHMampq8Mgjj3BLlsgGsdgR2ZiPP/4YW7dulcymT5+OadOmCUpEtu6BBx7A9OnTJbOtW7di06ZNYgIR0U3jViyRDSkqKoJarUZDQ4N55u/vD61WC19fX4HJyNZduHABERERqKioMM+6deuGvLw8hISECExGRDeCK3ZENsJoNOLhhx+WlDoA+Oijj1jq6Jb5+vriww8/lMwaGhrw8MMPw2g0CkpFRDeKxY7IRqxbtw7fffedZDZ79mxMnDhRUCKyN5MmTUJKSopktnPnTqxbt05QIiK6UdyKJbIBhYWFiIqKQnNzs3kWHByMvLw8eHp6CkxG9qa2thZqtRrnzp0zz1xdXaHRaBAaGiowGRFdD67YEVk5g8GAOXPmSEodAGzYsIGljizOy8sLGzZskMyam5sxe/ZsGAwGQamI6Hqx2BFZudWrV2P//v2S2WOPPYbRo0cLSkT2bvTo0Xjssccks/379+Nvf/uboEREdL24FUtkxY4fP46YmBi0traaZ3379oVGo4G7u7vAZGTvGhsbERkZiTNnzphnTk5OyM7ORnh4uMBkRPRruGJHZKX0ej1SUlIkpU4mk2HTpk0sddTh3N3d8fHHH0tuT9fa2oqUlBTo9XqByYjo17DYEVmpN954AxkZGZLZwoULMXToUEGJqKsZNmwYnnrqKcksIyMDb7zxhqBERPRbuBVLZIVycnIQHx+P9vZ286x///7Izs6Gi4uLwGTU1eh0OsTExODkyZPmmYODAzIyMhAVFSUwGRFdDVfsiKxMW1sbkpOTJaVOLpcjNTWVpY46nYuLC1JTUyGX/3y4aG9vR3JyMtra2gQmI6KrYbEjsjLLli2DRqORzJYsWYKEhARBiairS0hIwJIlSyQzjUaDV199VVAiIroWbsUSWZHDhw8jMTFRcr0wtVqNjIwMODk5CUxGXV1rayvi4+ORm5trnikUChw8eBDx8fECkxHRL7HYEVkJnU6H2NhYnDhxwjxTKpXIyMhAdHS0uGBEl2RnZ2PgwIGSs2LDw8ORmZnJjwkQWQluxRJZiRdeeEFS6gDgxRdfZKkjqxETE4MXXnhBMjt+/PgVMyIShyt2RFZg3759GDZsGH75v2NcXBwOHjwIBwcHgcmIpNrb23HXXXchMzPTPJPJZNi7dy8GDx4sMBkRASx2RMI1NjYiKioKp0+fNs+cnJyQmZmJiIgIgcmIrk6r1SI2NlZyVmxoaChycnLg5uYmMBkRcSuWSLAlS5ZISh0AvPrqqyx1ZLUiIiLwl7/8RTI7derUFWfOElHn44odkUA7d+7E3XffLZklJiZi7969UCgUglIR/TaDwYChQ4ciPT1dMt+5cydGjhwpKBURsdgRCVJXVwe1Wo2SkhLzzMXFBTk5OQgLCxOYjOj6FBQUICoqCjqdzjwLCQmBRqOBp6enwGREXRe3YokEWbRokaTUAcCbb77JUkc2Iyws7Ir7xhYXF2PRokWCEhERV+yIBPj6668xceJEyWzEiBHYuXOn5NZNRNbOaDRi1KhR2L17t2T+9ddfY/z48YJSEXVdLHZEnezChQtQqVQoLy83z7p16waNRoM+ffqIC0Z0k86ePQu1Wo3GxkbzLCAgAFqtFj4+PgKTEXU9XBog6mRPPPGEpNQBwKpVq1jqyGb16dMHb7/9tmRWXl6OJ554QlAioq6LK3ZEnejLL7/EtGnTJLN77rkHW7duhUwmE5SK6NaZTCaMHz8e27Ztk8y//PJL3H///YJSEXU9LHZEneTHH39EREQEqqqqzDMvLy9otVr07NlTYDIiyygtLYVKpUJtba155ufnB61Wix49eogLRtSFcCuWqBOYTCbMnz9fUuoA4L333mOpI7sRFBSEd999VzKrqqrCY489Bq4hEHUOFjuiTvDPf/4T//nPfySze++9F7NmzRITiKiDPPjgg7j33nslsy1btuDTTz8VlIioa+FWLFEH4/YUdTXX+thBXl4egoKCBCYjsn9csSPqQCaTCfPmzZOUOgD44IMPWOrIbvXo0QNr166VzGpra/HII49wS5aog7HYEXWgDRs24Ntvv5XMZs2ahfvuu09QIqLOMXXqVMycOVMy27ZtGzZs2CAoEVHXwK1Yog5ytYu2BgYGIi8vjxdtpS6huroaERERkus2uru7Izc3l9dtJOogXLEj6gBGoxFz5syRlDoAWL9+PUsddRk+Pj5Yv369ZNbY2Ii5c+fCaDQKSkVk31jsiDrA+++/j++//14ye/jhhzFu3DgxgYgEGT9+PObOnSuZ7d69G2vWrBGUiMi+cSuWyMLy8/MRHR0NnU5nnoWEhCA3NxceHh4CkxGJUVdXB7VajZKSEvPMxcUFOTk5CAsLE5iMyP5wxY7IggwGA2bPni0pdQCwceNGljrqsjw9PbFx40bJTKfTYfbs2TAYDIJSEdknFjsiC1q5ciUOHjwomT3++OMYOXKkoERE1mHUqFH4wx/+IJmlp6dj1apVghIR2SduxRJZiFarRWxsLNra2syz0NBQ5OTkwM3NTWAyIuvQ2NiI6OhonDp1yjxzdHREVlYWIiIiBCYjsh9csSOygPb2diQnJ0tKnUwmQ2pqKksd0SXu7u7YtGkTZDKZedbW1oaUlBS0t7cLTEZkP1jsiCxgxYoVyMrKksyefvppJCUlCUpEZJ0GDx6MRYsWSWaZmZl4/fXXBSUisi/ciiW6RVlZWUhISIBerzfPwsPDkZWVBWdnZ4HJiKyTTqdDbGwsTpw4YZ4plUocPnwYMTExApMR2T6u2BHdgtbWVqSkpEhKnUKhQGpqKksd0TW4uLggNTUVCoXCPNPr9UhJSUFra6vAZES2j8WO6Ba8/PLLyMvLk8yee+45xMfHC0pEZBsGDhyI//u//5PMcnNz8corrwhKRGQfuBVLdJMOHTqEpKQkya2RoqKicPjwYTg6OgpMRmQb2traEB8fD41GY57J5XKkp6cjISFBYDIi28ViR3QTmpubERMTg/z8fPPMwcEBR44cQWRkpMBkRLYlJycH8fHxkrNi+/fvj+zsbLi4uAhMRmSbuBVLdBOWLl0qKXXAxW1ZljqiGxMVFYWXXnpJMjt58iSWLl0qKBGRbeOKHdEN2rNnD4YPHy6ZDRw4EAcOHIBSqRQTisiG6fV6JCYmIiMjwzyTyWTYvXs3hg0bJjAZke1hsSO6AQ0NDYiKisKZM2fMM2dnZ2RnZ2PAgAECkxHZtuPHjyMmJkZyVmzfvn2h0Wjg7u4uMBmRbeFWLNENeOaZZySlDgCWL1/OUkd0i8LDw/Haa69JZmfOnMEzzzwjKBGRbeKKHdF12r59O8aOHSuZDRkyBLt375Zcj4uIbo7BYMDw4cOxf/9+yXz79u0YPXq0oFREtoXFjug61NbWQqVSobS01DxzdXWFRqNBaGiowGRE9uXUqVOIjIxEc3OzeRYcHIzc3Fx4eXmJC0ZkI7gVS3QdFi5cKCl1APDWW2+x1BFZWGhoKP76179KZufOncPChQvFBCKyMVyxI/oNaWlpmDx5smQ2atQo7NixAzKZTFAqIvtlNBoxZswY7Ny5UzL/6quvMGnSJBgMBn78gegaWOyIfkVVVRVUKhUqKirMMw8PD+Tm5iIkJERgMiL7VlxcDLVajfr6evOsR48euP/++/H3v/8d3t7e+PTTTzF48GCBKYmsD4sd0a+YPn06vvjiC8ls48aNmDNnjqBERF3Hxx9/jLlz517z69HR0cjOzu7ERETWj8WO6Bo2b96MGTNmSGYTJkxAWloat2CJOoHJZML48eOxbdu2q35dJpNBp9PByckJBoMB1dXVqKioQEVFBSrLy9Gq08FoMECuUMDJxQXdAwLg7+8Pf39/+Pj4cDuX7BKLHdFVlJeXIyIiAtXV1eaZt7c3tFotAgMDBSYj6jrKysrwu9/9DidPnrzm92RmZqK+vh65WVloaWqCSa+Hu04Hz+pqOOj1kJtMMMpkaFcqUefjg0YXF8iUSji7uUEdG4uoqCh4e3t34rsi6li8/xHRZUwmE+bPny8pdQCwZs0aljqiTrRo0aJrlrqAgAAMTkzEt2lpcG1vR0hxCQKrq+HZ1AQHg+Gaz9muUKDOzQ3nfXxw9MIFZBw4gL5hYUgaMoT/f5NdYLEjuswnn3yCtLQ0yWzq1KmYPn26oEREXVNVVdUVM4VCgcTERCTFx8OvsREDjmQitKEBCqPxup7TwWCAX309/OrrcUdxMc75+aHwwgX8s7AQ8UlJSEpK4j2fyaZxK5boF0pKSqBWq1FXV2ee9ejRA3l5eejevbvAZERdz86dOzFx4kS0tLQAuPj/4qTx4xHk7Y2wEyfQMz8f7i6u8PL0vKXXMcpkKAgKwomwMPgEB2HcpEkICAiwxFsg6nQsdkSXmEwmjB07Fjt27JDMt2zZgilTpogJRdTFFRYWYsmSJThy5AgemDIFgc3NCM/MhOuly6AoFEr49+hhkdeqd3VFZng4mnv2xL3TH0Dv3r0t8rxEnYnFjuiSDz74AI899phk9tBDD+GTTz4RlIiIAKCoqAifffIJPE+dQv+DB6H4xWfoLFnsAEAvl+OHiDtQHRKC+2fOZLkjm8NiRwTg9OnTiIyMRFNTk3nWs2dP5OXl8Yw5IoHKy8vx+SefwOvMWQzSatHS1HTpoxIXD12enl5wc3W16GsaZTIcVEWgtk9fzEh+iNuyZFN4r1jq8oxGI+bMmSMpdQCwYcMGljoigfR6PbampcG17DwSjh2DwmSCm6srAgMC4O3tgx49/C1e6gBAbjIhQXsMLufL8E1aGvR6vcVfg6ijsNhRl/fOO+9g7969ktkjjzyCsWPHCkpERABw4MAB1JwrRdzx41D+4qxXmUwGF2dnKDvwAsNKoxFxx46jurQU6enpHfY6RJbGYkdd2smTJ/Hcc89JZn369MHKlSsFJSIi4OLFiTMOHMCAggJ4NDcLyeDZ3Iz++QU4vH8/zp8/LyQD0Y1isaMuS6/XIyUlxXwphZ9s3LgR3bp1E5SKiAAgff9+uFdVIay0VGiOfqWlcK+qwoH9+4XmILpeLHbUZb311lv44YcfJLMnn3wSI0aMEJSIiACgpqYGZwoKcHtRMeSCz++Tm0wILSrGmfx81NTUCM1CdD1Y7KhLys3NxYsvviiZhYWFYcWKFYISEdFPcnJy4NDcjOCr3HlChF5VVVA2N0Oj0YiOQvSbWOyoy2lra0NKSgra29vNM7lcjk2bNsG1A86wI6LrZzAYkJuVhZDikuu+TVhHUxiN6F1SAk1mJgy/ch9aImvAYkddzvLly5GdnS2ZLV68GImJiYISEVk3Pz+/W36OcePGQafTXfPrb775JgCguroaFefP44Nd3/3q84Xv34dJ2VkYl5WJ+Vot6jv4kiSBF6rR0tSE6urqX/2+devWYfPmzTf1GitXrkR4eDgiIyNx7733ov7S3TWIbgQvUExdSmZmJhISEiR/646IiMCRI0fg7OwsMBmR9fLz80NVB2+L/vQaeXl5+Ob//T9M/H6P5BInlxt46CAOD7oLALD45EmEurpgQa+QW8pgMJmgkMmu+rV2hQJfDxuKcdOmQaVS3dLrXMv333+PQYMGwdnZGc8//zzkcjmWLVvWIa9F9osrdtRltLS0IDk5WVLqFAoFUlNTWeqIbtCOHTsQHR0NlUqFRYsW4ac1grVr16Jfv34YOXIkZsyYgffeew/AxcsINTY2orGxEWPHjoVarYZarcb27duxdOlS1NbWIjo6GkuXLkVreTkeyMoEALQbjVh2qhATsjIxMSsL269SMOM8PFDe2goAqGprw4Jjx3Df0WzM0OTg1KVLpZzRNeO+o9mYlnMUK06fxn1HL67av1NUhBcLC5CSm4vXTp/GWZ0Os/NycW92Nubk5eLHtjYAwD+Li7Hy3XcxadIkLFiwAADw+eefIzw8HFFRUZg8eTIA4OWXXza/56ysLAwcOBCRkZFITk42n4Hfp08fvPzyy4iJiUF8fLz5UirDhw83/y6KjY1FqeAzgsk2sdhRl/HSSy/h2LFjktnzzz+PuLg4QYmIbJNOp8MjjzyC//znP9BoNMjPz8eWLVtQWlqKlStXIiMjA2lpaVd85AEAtm/fDl9fX+Tm5kKj0eCuu+7C8uXL4eXlhaNHj2LmAw+gW22t+fs/Ly9Hg96AtJhY/Dc2FoO8PCXPZzCZcKC2BsN9fAEAy0+fxuMhvfDv6Bj8ue9teO30afN8Qa9e+H9R0XCSSw99+U3N+CgiAi+EhuLlU4VYfnsYtsTE4IGAALxXXAQAeL+kGMsnTsJrr76K119//eJzLl+OtLQ05OTkXPWe0ikpKXj33Xeh0Wjg5uaGNWvWmL8WEhKC7Oxs3HPPPVi/fv0Vj920aRNGjx59PX8cRBIsdtQlpKen469//atkFhMTg6VLlwpKRGS7Tp48if79+6NPnz6Qy+WYNWsW9u3bh4yMDIwcORKenp5wc3PDhAkTrnisWq3Gvn378Oyzz+LQoUPw8PCQfL1Vp4PyF6vqh+pqMT0gAPJLW6SeSgcAQINej0nZWbjrh0OoaW/HkEu3/ztUV4s/FxRgUnYWni8sQGX7xRU3bWMjRl0qf+O7d5e85khfHzjK5WjU65FVX48Fx49hUnYW3i8uRkXrxcdHunfDh7t3Y8+ePXBwuJghKSkJ8+fPx/r163H5p5rq6urQ2tqKhIQEAMBDDz2Effv2mb/+0wpfXFwczp49K3ns6tWrAQDTp0+/5p8B0bUoRQcg6mhNTU1ISUmR/OJ1dHTEJ598Yv4FTUQ3z2QyQSaTXVFurvYR7n79+iE7Oxtbt27FU089heTkZPzxj380f91oMFzXteu6KZVIi4mFzmDAnLw8fHq+DMk9gwAAW6JjrvlZOQC4/Nmd5T/fmszPwRFpMbFXPObDiAh85uqCffn5GDp0KI4cOYK1a9fi0KFD+O9//4uYmBjJjsDVfhayX2RycnICcPHjIL/8eMh///tf/P3vf8eePXt+82dAdDVcsSO799xzz6GwsFAyW7ZsWYd9AJrI3vXv3x/5+fkoKiqC0WjE559/jiFDhiA+Ph67du1CfX09mpub8c0331zx2LKyMri5uSE5ORlPPfUUjh49CuDngiNXKGD8RQFK9PLC5vJyGC8VpTp9u+T5XBQKLL3tNmwsLYXeZMJAT098Xn7xM2tGkwknm5oAAHe4u2PXpTNav62qvOr7clcq4ePggO8vfV+70YjC5mYYTSacb21FRM8gPDhjBs6ePQuDwYDTp0+bt5IdHR1x4cIF83N5eXnByckJGRkZAIBPP/0UQ4YM+dWfa2ZmJhYvXoyvvvoK7u7uv/q9RNfCFTuya7t27cK7774rmQ0aNAiLFy8WlIjI9tTU1CA4ONj872+//TY+/PBDTJ48GXq9HqNHj8aUKVMgk8mwcOFC3HnnnQgJCUFMTMwVW625ublYvHgxFAoFXFxcsGHDBgAXP4+mVqsRHBSEcT4+5u+fHhCI0806TMjOggIy/DEkBGMuu/yKuls39HN1w/aqKrxwWyheLCzE5+fPQ28yYUoPf/R3c8Of+96GxSdPYt25EsR7eMJdocDVrOzfHy8WFmLl2bMwwIR5QcHo7eyMxSdPovJUIfDdTrzyyitQKBRYvHgxCgsLYTKZMHXqVMnPCLj4ObkFCxagpaUF0dHR5pMurmXJkiWor683b2EnJSXh/fff/40/HSIpXu6E7FZ9fT0iIyNRVFRknrm4uODo0aPo16+fwGRE9qupqQlubm7Q6XQYOnQoNm7cCLVafd2P/+6773By+3bcffCQRXPpDAY4y+WQyWRYf+4cqtrb8H99b7uh5/jfXYPQf8wYjBw50qLZiCyJK3Zkt55++mlJqQOAFStWsNQRdaDnn38eu3fvNl9e6EZKHQD4+/sj08UF7QoFHCx4lwdNQwOWnzkNo8kEfycn/PUGfw+0KxRodHGBv7+/xTIRdQQWO7JL27Ztu+ISAsOGDcMTTzwhKBFR1/D222/f0uP9/f0hUypR5+YGPwveeSHBy+uqJ0Vcrzo3N8iUShY7sno8eYLsTk1NDebNmyeZubu74+OPP4Zczv/kiayZj48PnN3ccP4Xn7OzBud9L+bysbJcRJfjUY7szpNPPomysjLJbOXKlejbt6+gRER0vRQKBdSxsSgO6QWDlfxFzCCXo6hXL0TGxUFxjZMuiKyFdfxfQ2QhW7ZswT/+8Q/JbMyYMXjkkUcEJSKiGxUVFYV2V1ecu+zsV1FK/Pygd3VFZGSk6ChEv4nFjuxGZWUl5s+fL5l5enpi/fr1kguDEpF18/b2Rt+wMBT2DpFc004Eo0yGU71D0LdfP3hfursFkTVjsSO7YDKZsGDBAlRWSi88+s4771xxbSkisn5JQ4ag0c8PBUFBQnPkBwWh0c8PSYMHC81BdL1Y7MgufP755/jXv/4lmU2ePBkPPfSQoEREdCsCAwMRn5SEE2FhqHd17bDXMRgN0LXozHe2+KU6V1ec7BeGgYMHIzAwsMMyEFkSL1BMNq+srAwqlQo1NTXmma+vL7RaLS9NQGTD9Ho9UjduhOHYcQzJzobSaLTo8+taWi793jABkMHDwwNubm6QAdDL5dgbGwOH8HAkz50LpZJXByPbwBU7smkmkwmPPvqopNQBwNq1a1nqiGycUqnE+EmT0NyzJ36IuMPin7draGjAxVIHACbU19ehsrISurY2/BBxB3SBPTFu0iSWOrIpLHZk0z7++GNs3bpVMps+fTqmTZsmKBERWVJAQADunf4AqkNCcFAVAb0FL4FytZOqWk1G7OvfD6c8PBCTMBABAQEWez2izsCtWLJZRUVFUKvVl/7WfZG/vz+0Wi18fX0FJiMiSysqKsKWzV/AtawMccePw6O5+Zafs76hAY2NP//+aPLwwIm4O1Hm6oIvtmzB+fPn8cUXX+Dee++95dci6iwsdmSTjEYjRo8eje+++04yT0tLw8SJEwWlIqKOVF5ejq1paag5V4oBBQUIKy2F/BYOYc06HWpra2CUyVDWrx8KBgxAaXU10r75Bj/++CMAIC4uDkeOHLHUWyDqcPzgANmkdevWXVHqZs+ezVJHZMcCAgKQMncuDhw4gAxnJ5wLDEBoUTF6VVVBcRMnVsgcHVDRuzdKbr8dVe7uOJCRgfT0dBgMBvP3+FnJRZKJrhdX7MjmFBYWIioqCs2/2IoJDg5GXl4ePD09BSYjos5SVlaG9AMHcCY/H8rmZvQuKUHghWp4NjXB4RfF7HLtCgXq3Nxw3tcHZ4ODUdXWhvwzZ3AgPR3l5eWS7w0JCcGuXbsQGhra0W+HyGJY7MimGAwGDB8+HPv375fMt2/fjtGjRwtKRUSi1NTUQKPRQJOZiZamJpj0erjrdPCoroGjXg+5yQijTI42pRL1Pt5odHGBTKmEs5sb1LGxmDlz5hWF7ieJiYnYt28f5FZyz1qi68FiRzZl5cqVWLx4sWT22GOPYe3atYISEZE1MBgMqK6uRkVFBSoqKlBZXo62lhYY9HoolEo4Ojuje0AA/P394e/vDx8fHygUCowYMQLff//9NZ931apV+NOf/tR5b4ToFrHYkc04fvw4YmJi0Nraap717dsXGo0G7u7uApMRka3KysrC9OnTUVZWhpkzZ+J///sfiouLzV93cnLC0aNHMWDAAIEpia4fix3ZBL1ej8TERGRkZJhnMpkM33//PYYOHSowGRHZA4PBAIVCgT179mD48OGSrw0cOBAHDhzghYrJJvCDA2QT3njjDUmpA4CFCxey1BGRRSgUCgDAsGHD8NRTT0m+dvjwYbz55psiYhHdMK7YkdXLyclBfHw82tvbzbP+/fsjOzsbLi4uApMRkT1qbm5GTEwM8vPzzTMHBwccOXIEkZGRApMR/Tau2JFVa2trQ3JysqTUyeVypKamstQRUYdwdXVFamqq5GzY9vZ2JCcno62tTWAyot/GYkdWbdmyZdBoNJLZkiVLkJCQICgREXUFgwYNwrPPPiuZ5eTk4C9/+YugRETXh1uxZLUOHz6MxMREyVXg1Wo1MjIy4OTkJDAZEXUFra2tuPPOO5GXl2eeKRQKHDx4EPHx8QKTEV0bix1ZJZ1Oh9jYWJw4ccI8UyqVyMjIQHR0tLhgRNSlZGVlISEhAXq93jwLDw9HVlYWnJ2dBSYjujpuxZJVeuGFFySlDgBefPFFljoi6lSxsbF44YUXJLPjx49fMSOyFlyxI6uzb98+DBs2DL/8TzMuLg4HDx6Eg4ODwGRE1BW1t7fjrrvuQmZmpnkmk8mwb98+JCUlCUxGdCUWO7IqjY2NiIqKwunTp80zJycnZGZmIiIiQmAyIurKtFotYmNjJWfFhoaGIicnB25ubgKTEUlxK5asypIlSySlDgBeffVVljoiEioiIgKvvvqqZHbq1CksWbJEUCKiq+OKHVmNnTt34u6775bMEhMTsXfvXvNV4YmIRDEYDBgyZAgOHjwome/cuRMjR44UlIpIisWOrEJdXR3UajVKSkrMMxcXF+Tk5CAsLExgMiKinxUUFCAqKgo6nc48CwkJQW5uLjw8PAQmI7qIW7FkFRYtWiQpdQDw5ptvstQRkVUJCwvDG2+8IZkVFxdj0aJFghIRSXHFjoT7+uuvMXHiRMlsxIgR2Llzp+SWPkRE1sBoNGLUqFHYvXu3ZP71119j/PjxglIRXcRiR0JduHABKpUK5eXl5lm3bt2g0WjQp08fccGIiH7F2bNnoVar0djYaJ4FBgYiLy8PPj4+ApNRV8flEBLqiSeekJQ6AFi1ahVLHRFZtT59+mDVqlWS2fnz5/HEE08ISkR0EVfsSJgvv/wS06ZNk8zuuecebN26FTKZTFAqIqLrYzKZMG7cOHz77beS+Zdffon7779fUCrq6ljsSIgff/wRERERqKqqMs+8vLyg1WrRs2dPgcmIiK5faWkpVCoVamtrzTM/Pz9otVr06NFDXDDqsrgVS53OZDJh/vz5klIHAO+99x5LHRHZlKCgILz77ruSWVVVFRYsWACum5AILHbU6f75z3/iP//5j2R23333YdasWWICERHdggcffBBTpkyRzP7973/j008/FROIujRuxVKn4rYFEdmjiooKqFQqfryEhOOKHXUak8mEefPmSUodAHzwwQcsdURk0/z9/bF27VrJrLa2FvPmzeOWLHUqFjvqNOvXr7/i7LFZs2bhvvvuE5SIiMhypk6dipkzZ0pm27Ztw8aNGwUloq6IW7HUKXgxTyLqCqqrqxEREXHFRddzc3PRu3dvgcmoq+CKHXU4o9GIOXPmSEodcHEFj6WOiOyJj48PPvroI8msoaEBc+fOhdFoFJSKuhIWO+pw77//Pr7//nvJ7OGHH8a4cePEBCIi6kATJkzAnDlzJLNdu3ZhzZo1ghJRV8KtWOpQ+fn5iI6Ohk6nM89CQkKQm5sLDw8PgcmIiDpOXV0d1Go1SkpKzDNXV1ccPXoUYWFhApORveOKHXUYg8GA2bNnS0odAGzcuJGljojsmqen5xUnTTQ3N2P27NkwGAyCUlFXwGJHHWblypU4ePCgZPb4449j5MiRghIREXWeUaNG4Q9/+INklp6ejrfffltQIuoKuBVLHUKr1SI2NhZtbW3mWWhoKHJycuDm5iYwGRFR52lsbERUVBROnz5tnjk5OSErKwt33HGHwGRkr7hiRxbX3t6O5ORkSamTyWRITU1lqSOiLsXd3R2bNm2CTCYzz1pbW5GSkoL29naBychesdiRxa1YsQJZWVmS2dNPP42kpCRBiYiIxBkyZAj+9Kc/SWZHjhzB66+/LigR2TNuxZJFZWVlISEhAXq93jwLDw9HVlYWnJ2dBSYjIhJHp9MhNjYWJ06cMM+USiUyMjIQHR0tLhjZHa7YkcX8tL3wy1KnUCiQmprKUkdEXZqLiwtSU1Mhl/982NXr9UhOTkZra6vAZGRvWOzIYl5++WXk5eVJZs899xzi4+MFJSIish4DBw7Ec889J5nl5uZi2bJlghKRPeJWLFnEoUOHkJSUJLllTlRUFA4fPgxHR0eByYiIrEdbWxvi4+Oh0WjMM7lcjvT0dCQkJAhMRvaCxY5uWXNzM2JiYpCfn2+eOTg44MiRI4iMjBSYjIjI+uTk5CA+Pl5yVmz//v2RnZ0NFxcXgcnIHnArlm7Z0qVLJaUOuLgty1JHRHSlqKgovPjii5LZyZMnsXTpUkGJyJ5wxY5uyZ49ezB8+HDJbODAgThw4ACUSqWYUEREVk6v1yMxMREZGRnmmUwmw/fff4+hQ4cKTEa2jsWOblpDQwOioqJw5swZ88zZ2RnZ2dkYMGCAwGRERNbv+PHjiImJkZwV27dvX2g0Gri7uwtMRraMW7F005555hlJqQOA5cuXs9QREV2H8PBwLF++XDI7c+YMnn32WUGJyB5wxY5uyvbt2zF27FjJbMiQIdi9ezcUCoWgVEREtsVgMGDYsGE4cOCAZL5jxw7cfffdglKRLWOxoxtWW1sLlUqF0tJS88zV1RUajQahoaECkxER2Z7CwkJERUWhubnZPAsODkZeXh48PT0FJiNbxK1YumELFy6UlDoAeOutt1jqiIhuwu23344333xTMjt37hwWLlwoJhDZNK7Y0Q1JS0vD5MmTJbNRo0Zhx44dkMlkglIREdk2o9GI0aNH47vvvpPM09LSMHHiREGpyBax2NF1q6qqgkqlQkVFhXnm4eGB3NxchISECExGRGT7iouLoVKp0NDQYJ75+/tDq9XC19dXYDKyJdyKpev2+OOPS0odAKxevZqljojIAkJCQrB69WrJrKKiAn/84x/FBCKbxBU7ui6bN2/GjBkzJLMJEyYgLS2NW7BERBZiMpkwYcIEfPPNN5L5F198gWnTpglKRbaExY5+U3l5OSIiIlBdXW2eeXt7Q6vVIjAwUGAyIiL7U1ZWBpVKhZqaGvPM19cXWq0W/v7+ApORLeBWLP0qk8mE+fPnS0odAKxZs4aljoioA/Ts2RPvvfeeZHbhwgXMnz8fXIuh38JiR7/qk08+QVpammQ2depUTJ8+XVAiIiL7N3PmTNx///2S2VdffYV//OMfghKRreBWLF1TSUkJ1Go16urqzLMePXogLy8P3bt3F5iMiMj+VVZWIiIiApWVleaZp6cn8vLyEBwcLDAZWTOu2NFVmUwmzJs3T1LqAOCDDz5gqSMi6gTdu3fHunXrJLO6ujrMmzePW7J0TSx2dFUffvghduzYIZk99NBDmDJliphARERd0H333YcHH3xQMtu+fTs++ugjQYnI2nErlq5w+vRpREZGoqmpyTzr2bMn8vLy4O3tLTAZEVHXU1NTA5VKhbKyMvPM3d0dGo0Gffv2FZiMrBFX7EjCaDRizpw5klIHABs2bGCpIyISwNvbG+vXr5fMGhsbMWfOHBiNRkGpyFqx2JHEO++8g71790pmjzzyCMaOHSsoERER3XPPPZg3b55ktmfPnisui0LErVgyO3nyJKKjo9HS0mKe9enTBxqNBt26dROYjIiI6uvroVarUVxcbJ65uLjg6NGj6Nevn8BkZE24YkcAAL1ej5SUFEmpA4CNGzey1BERWQEPDw98/PHHkplOp0NKSgoMBoOgVGRtWOwIAPDWW2/hhx9+kMyefPJJjBgxQlAiIiK63O9+9zv88Y9/lMwOHTqEt956S1AisjbciiXk5uYiLi4O7e3t5llYWBiOHj0KV1dXgcmIiOhyTU1NiI6ORmFhoXnm6OiIzMxMqFQqgcnIGnDFrotra2tDSkqKpNTJ5XJs2rSJpY6IyAq5ublh06ZNkMlk5llbWxuSk5Mlv8upa2Kx6+KWL1+O7OxsyWzx4sVITEwUlIiIiH5LUlISFi9eLJllZ2fjtddeE5SIrAW3YruwzMxMJCQkSD50GxERgSNHjsDZ2VlgMiIi+i0tLS2Ii4vDsWPHzDOlUokffvgBsbGxApORSFyx66JaWlqQnJwsKXVKpRKpqaksdURENsDZ2RmpqalQKBTmmV6vR3JyMlpbWwUmI5FY7LqoF198UfK3PABYunQp4uLiBCUiIqIbdeedd+LPf/6zZKbVavHSSy8JSkSicSu2C0pPT8fgwYPxyz/6mJgY/PDDD3BwcBCYjIiIblRbWxsSEhJw9OhR80wul2P//v246667xAUjIVjsuhieJk9EZH942Sr6Cbdiu5jnnntOUuoAYNmyZSx1REQ2TK1W45VXXpHMCgoKrtimJfvHFbsuZNeuXRg5cqRkNmjQIOzfv1/y4VsiIrI9er0egwcPvuIuQrt378bw4cPFhKJOx2LXRdTX1yMyMhJFRUXmGW8eTURkX06ePIno6GjJfb/79OkDjUbD+353EdyK7SKefvppSakDgBUrVrDUERHZkf79+2PFihWS2dmzZ6+4mDHZL67YdQHbtm3DuHHjJLNhw4Zh165dkMvZ7YmI7InRaMSIESOwd+9eyXzbtm0YO3asoFTUWVjs7FxNTQ1UKhXKysrMM3d3d2g0GvTt21dgMiIi6iinT59GZGQkmpqazLOgoCDk5ubC29tbYDLqaFyusXNPPvmkpNQBwMqVK1nqiIjs2G233Ya33npLMistLcVTTz0lKBF1Fq7Y2bEtW7bgvvvuk8zGjBmDbdu2QSaTCUpFRESdwWQyYcyYMfjf//4nmW/ZsgVTpkwRE4o6HIudnaqsrERERAQqKyvNM09PT+Tl5SE4OFhgMiIi6iwlJSVQqVSor683z3r06AGtVgs/Pz+ByaijcCvWDplMJixYsEBS6gDgnXfeYakjIupCevXqhXfeeUcy+/HHH7FgwQJwXcc+ccXODn322WeYNWuWZDZ58mRs2bKFW7BERF2MyWTC5MmT8d///lcy/+yzzzBjxgxBqaijsNjZmbKyMqhUKtTU1Jhnvr6+0Gq18Pf3F5iMiIhEKS8vR0REBKqrq80zHx8f5OXlITAwUGAysjRuxdoRk8mERx99VFLqAGDt2rUsdUREXVhAQADWrFkjmVVXV+PRRx/llqydYbGzIx9//DG2bt0qmU2fPh3Tpk0TlIiIiKzF9OnT8cADD0hmX3/9NVJTUwUloo7ArVg7UVRUBLVajYaGBvPM398fWq0Wvr6+ApMREZG1qKqqQkREBH788UfzzMPDA3l5eejVq5fAZGQpXLGzA0ajEQ8//LCk1AHARx99xFJHRERmfn5++OijjySz+vp6zJ07l1uydoLFzg6sW7cO3333nWQ2e/ZsTJw4UVAiIiKyVpMmTUJycrJktnPnTqxbt05QIrIkbsXauMLCQkRFRaG5udk8Cw4ORl5eHjw9PQUmIyIia1VbWwuVSoXS0lLzzM3NDTk5OQgNDRWYjG4VV+xsmMFgwJw5cySlDgA2bNjAUkdERNfk5eWFDRs2SGZNTU2YM2cOjEajoFRkCSx2Nmz16tXYv3+/ZPbYY49h9OjRghIREZGtGDNmDObPny+Z7du3D3/7298EJSJL4FasjTp+/DhiYmLQ2tpqnvXt2xcajQbu7u4CkxERka1oaGhAZGQkzp49a545OTnh6NGjGDBggLhgdNO4YmeD9Ho9UlJSJKVOJpNh06ZNLHVERHTdunXrhk2bNklmra2tSElJgV6vFxOKbgmLnQ164403kJGRIZktXLgQQ4cOFZSIiIhs1bBhw/DUU09JZocPH8abb74pKBHdCm7F2picnBzEx8ejvb3dPOvfvz+ys7Ph4uIiMBkREdmq5uZmxMTEID8/3zxzcHDAkSNHEBkZKTAZ3Siu2NmQtrY2JCcnS0qdXC5HamoqSx0REd00V1dXpKamQi7/uRa0t7cjOTkZbW1tApPRjWKxsyHLli2DRqORzJYsWYKEhARBiYiIyF4MGjQIzz77rGSWk5ODv/zlL4IS0c3gVqyNOHz4MBITE2EwGMwztVqNjIwMODk5CUxGRET2orW1FXfeeSfy8vLMM4VCgYMHDyI+Pl5gMrpeLHY2QKfTITY2FidOnDDPlEolMjIyEB0dLS4YERHZnaysLCQkJEjOig0PD0dWVhacnZ0FJqPrwa1YG/DCCy9ISh0AvPjiiyx1RERkcbGxsXj++ecls+PHj+OFF14QlIhuBFfsrNy+ffswbNgw/PKPKS4uDgcPHoSDg4PAZEREZK/a29sxaNAgZGVlmWcymQz79u1DUlKSwGT0W1jsrFhjYyOioqJw+vRp88zJyQmZmZmIiIgQmIyIiOydVqtFbGys5KzY0NBQ5OTkwM3NTWAy+jXcirViS5YskZQ6AHj11VdZ6oiIqMNFRETg1VdflcxOnTqFJUuWCEpE14MrdlZq586duPvuuyWzxMRE7N27FwqFQlAqIiLqSgwGA4YMGYKDBw9K5jt37sTIkSMFpaJfw2Jnherq6qBWq1FSUmKeubi4ICcnB2FhYQKTERFRV1NQUICoqCjodDrzLCQkBLm5ufDw8BCYjK6GW7FWaNGiRZJSBwBvvvkmSx0REXW6sLAwvPHGG5JZcXExFi1aJCgR/Rqu2FmZr7/+GhMnTpTMRowYgZ07d0pu9UJERNRZjEYjRo0ahd27d0vmX3/9NcaPHy8oFV0Ni50VuXDhAlQqFcrLy82zbt26QaPRoE+fPuKCERFRl3f27Fmo1Wo0NjaaZ4GBgcjLy4OPj4/AZPRLXAKyIk888YSk1AHAqlWrWOqIiEi4Pn36YNWqVZLZ+fPn8cQTTwhKRFfDFTsr8eWXX2LatGmS2T333IOtW7dCJpMJSkVERPQzk8mEcePG4dtvv5XMv/zyS9x///2CUtEvsdhZgR9//BERERGoqqoyz7y9vZGXl4eePXsKTEZERCRVWloKlUqF2tpa88zPzw9arRY9evQQF4wAcCtWOJPJhPnz50tKHQC8++67LHVERGR1goKC8O6770pmVVVVWLBgAbhWJB6LnWD//Oc/8Z///Ecyu++++zBr1iwxgYiIiH7Dgw8+iClTpkhm//73v/Hpp5+KCURm3IoVqLS0FBEREairqzPPuJxNRES2oKKiAiqVSrLj5OXlBa1Wyx0ngbhiJ4jJZMK8efMkpQ4APvjgA5Y6IiKyev7+/li7dq1kVltbi3nz5nFLViAWO0HWr19/xVlFs2bNwn333ScoERER0Y2ZOnUqZs6cKZlt27YNGzduFJSIuBUrAC/ySERE9qK6uhoRERFXXFw/NzcXvXv3Fpisa+KKXSczGo2YM2eOpNQBF1fwWOqIiMjW+Pj44KOPPpLMGhoaMHfuXBiNRkGpui4Wu072/vvv4/vvv5fMHn74YYwbN05MICIiols0YcIEzJkzRzLbtWsX1qxZIyhR18Wt2E6Un5+P6Oho6HQ68ywkJAS5ubnw8PAQmIyIiOjW1NXVQa1Wo6SkxDxzdXXF0aNHERYWJjBZ18IVu05iMBgwe/ZsSakDgI0bN7LUERGRzfP09LzipInm5mbMnj0bBoNBUKquh8Wuk6xcuRIHDx6UzB5//HGMHDlSUCIiIiLLGjVqFP7whz9IZunp6Xj77bcFJep6uBXbCbRaLWJjY9HW1maehYaGIicnB25ubgKTERERWVZjYyOioqJw+vRp88zJyQlZWVm44447BCbrGrhi18Ha29uRnJwsKXUymQypqaksdUREZHfc3d2xadMmyGQy86y1tRUpKSlob28XmKxrYLHrYCtWrEBWVpZk9vTTTyMpKUlQIiIioo41ZMgQ/OlPf5LMjhw5gtdff11Qoq6DW7EdKCsrCwkJCdDr9eZZeHg4srKy4OzsLDAZERFRx9LpdIiNjcWJEyfMM6VSiYyMDERHR4sLZue4YtdBflp2/mWpUygUSE1NZakjIiK75+LigtTUVMjlP1cNvV6P5ORktLa2Ckxm31jsOsjLL7+MvLw8yey5555DfHy8oERERESda+DAgXjuuecks9zcXCxbtkxQIvvHrdgOcOjQISQlJUlupRIVFYXDhw/D0dFRYDIiIqLO1dbWhvj4eGg0GvNMLpcjPT0dCQkJApPZJxY7C2tubkZMTAzy8/PNMwcHBxw5cgSRkZECkxEREYmRk5OD+Ph4yVmx/fv3R3Z2NlxcXAQmsz/cirWwpUuXSkodcHFblqWOiIi6qqioKLz00kuS2cmTJ7F06VJBiewXV+wsaM+ePRg+fLhkNnDgQBw4cABKpVJMKCIiIiug1+uRmJiIjIwM80wmk+H777/H0KFDBSazLyx2FtLQ0ICoqCicOXPGPHN2dkZ2djYGDBggMBkREZF1OH78OGJiYiRnxfbt2xcajQbu7u4Ck9kPbsVayDPPPCMpdQCwfPlyljoiIqJLwsPDsXz5csnszJkzeOaZZwQlsj9csbOA7du3Y+zYsZLZkCFDsHv3bigUCkGpiIiIrI/BYMDw4cOxf/9+yXz79u0YPXq0oFT2g8XuFtXW1kKlUqG0tNQ8c3V1hUajQWhoqMBkRERE1unUqVOIjIxEc3OzeRYcHIzc3Fx4eXmJC2YHuBV7ixYuXCgpdQDw1ltvsdQRERFdQ2hoKP76179KZufOnbvi/rJ047hidwvS0tIwefJkyWzUqFHYsWMHZDKZoFRERETWz2g0YvTo0fjuu+8k86+++gqTJk0SlMr2sdjdpKqqKqhUKlRUVJhnHh4eyM3NRUhIiMBkREREtqG4uBgqlQoNDQ3mmb+/P7RaLXx9fQUms13cir1Jjz/+uKTUAcDq1atZ6oiIiK5TSEgIVq9eLZlVVFTg8ccfFxPIDnDF7iZs3rwZM2bMkMwmTJiAtLQ0bsESERHdAJPJhIkTJ2Lr1q2S+ebNm/HAAw8ISmW7WOxuUHl5OSIiIlBdXW2eeXt7Q6vVIjAwUGAyIiIi23T+/HlERESgpqbGPPP19YVWq4W/v7/AZLaHW7E3wGQyYf78+ZJSBwBr1qxhqSMiIrpJgYGBeP/99yWzCxcu4NFHHwXXn24Mi90N+OSTT5CWliaZTZ06FdOnTxeUiIiIyD7MmDED999/v2SWlpaGv//974IS2SZuxV6nkpISqNVq1NXVmWc9evRAXl4eunfvLjAZERGRfaisrERERAQqKyvNM09PT+Tl5SE4OFhgMtvBFbvrYDKZMG/ePEmpA4APPviApY6IiMhCunfvjg8++EAyq6urw8MPP8wt2evUJVbsDAYDqqurUVFRgYqKClSWl6NVp4PRYIBcoYCTiwu6BwTA398f/v7+8PHxkdzj9YMPPsBjjz0mec6HHnoIn3zySWe/FSIiIrv30EMP4R//+Idk9sEHH+DRRx+VzG71+G6P7LrY1dTUICcnB7lZWWhpaoJJr4e7TgfP6mo46PWQm0wwymRoVypR5+ODRhcXyJRKOLu5QR0bi6ioKNTU1CAyMhJNTU3m5+3Zsyfy8vLg7e0t8N0RERHZp5qaGqhUKpSVlZlnbm5uyM3NRd++fS1yfLfXY7hdFruysjKk79+PMwUFcGhuRkhxCQKrq+HZ1AQHg+Gaj2tXKFDn5obzPj4oDumFdldXnCkpwZavvkJ5ebn5+7Zt24axY8d2xlshIiLqkrZt24Zx48ZJZuPHj0fKQw/hbGHhLR/f+4aFIWnIELu7qoVdFTu9Xo8DBw4g48ABuFdV4faiYgRXVUFhNN7wcxnkchR2c8fJoCBUubvjQEYG0tPTMXfuXHz44YcdkJ6IiIh+6ZFHHsH69euhUCiQmJiIpPh49GxrQ3jZ+Vs6vp/z80Nh7xA0+vkhPikJSUlJUCqVHfAOOp/dFLvy8nJsTUtDzblSDCgoQFhpKeS38Nb0ej0qKythkAFl/fqhYMAAVDU1YdGzz+K2226zYHIiIiK6mvr6egwbNgx3xsQgyNsbYSdOICi/AP5+frdcxIwyGQqCgnAiLAw+wUEYN2kSAgICLJRcHLsodkVFRdiyeTNcy84j7vhxeDQ339LzmQBUVVWhvb3NPGv28MDZwYPRGtwL905/AL17977F1ERERPRrioqK8Pknn8ChuBjhmZlwra8HADg6OMLXzw+WuIlnvasrMsPD0dyzp10c322+2BUVFeFfn30G36JiDDx2DMqbWJa9XGNjI+ob6iUzNzd3uHl54YeIO1AdEoL7Z860+T98IiIia/XL4/uAQwfR0tAg+bpHNw+4u7tb5LX0crndHN9t+jp25eXl2LJ5M3yKijFIq7VIqWvX61F/2X88SoUSHt26QWk04q48LXyKi7Fl8xeSEyqIiIjIMi4/vnu7uUOhkG691jc0oF2vt8jr2dPx3WaLnV6vx9a0NLiWnUfCsWO39Hm6n5gA1NbWXPqnn8jg5eUFmezigq/cZEKC9hhczpfhm7Q06C30HxURERFd/fguk8ng7eUFSDZfTaitrYGlth3t5fhus8XuwIEDqDlXirjjxy2yUgcAbW1taG9vl8zc3d3g6OgomSmNRsQdO47q0lKkp6db5LWJiIjo2sd3R0dHuLu5Sb63vb0d7W1tlz/FTbOH47tNFruysjJkHDiAAQUFt3yixK9RKh3QrZvHVb/m2dyM/vkFOLx/P86fP99hGYiIiLqK3zq+d/PoBqXSQTKz9IkCtn58t8lil75/P9yrqhBWWmrR53V0dISrqxsAGRyUDvDx8fnVM276lZbCvaoKB/bvt2gOIiKirui3ju8yyODj4wMHpQMAGVxdr9xVswRbPr7b3NX4ampqcKagADFFxRb5XN0vyQB4eXrCy9Pzur5fbjIhtKgYR319UVNTY7e3JyEiIupo13t8VyoU6N69e4dmseXju82t2OXk5MChuRnBVVWiowAAelVVQdncDI1GIzoKERGRzeLx3TJsqtgZDAbkZmUhpLjkpm4j0hEURiN6l5RAk5kJw6/cp46IiIiujsd3y7GpYlddXY2WpiYEVldb9Hl/r9Egv6npph8feOFirmoL5yIiIrJHn332GSIjI/G73/0O//73v1FVVdUhx/dbZYvHd5v6jF1FRQVMej28GhuFZTCZTDABkMt+Pq3Cs6kJJr0eFRUVHb7vT0REZMsqKirw+9//HsZLK3O7d+/GPffcgyGxsfAUeHy/Gls8vttcsXPX6a64bl2TwYAnjx9HRVsrAGBJ39vQZjTib8VFMJmAAW5u+Gv//vjs/Hl8WVGOVqMRKvdueC0sTFLQAOD76mq8X1KMVqMRke7dsOz22yGXyTDw0EFM7eGPQ3W1eDOsH/q4uJgfIzMY4NrUhJMnT8LV1bXjfxBEREQ2Kj093VzqftLa2gp5VRVqKirg7u4OFxeX37wP7OXH/mn+AajV6/HUpduBvVtcBC+lAx7q2RPvFRdhW1UV5JBhWoA/knsGXVdWB4MB7jodKioqoFKpbvi9imBTxa6yvByeV1kO3V9TAy8HJTaoVDCZTChrbcVDuRp8GhmFACcn1F666PA9fn6YGRgIAHi+oAC7qqsxytfX/DzV7e34uLQU/1BHwkkuxyunCvFNVSVGuLmjVq9HqNGA3wcEAA31+PGye8k6lpXhX198galTp3bgT4CIiMj++Pv5wbu+Hnp9O2pra1BfX48ePXpcsfjyS5cf+8+3tuLRY1pzsdtRVYUNKjV2V1/A4bo6bImOgaNcbu4E18ujugaVNnSLMZsqdq06HVyucouPfm6ueO1MPd48cwZ3+/qiur0dg7y8EODkBADwcrh4McMTTU1YXVSERoMedXo9gp2dJcXuaH09TjY3YVrO0YuvZzTC39EJdXoDnGQy3HXZFa9/SdnWBudfrOIRERHR9XF2coLyFxckNhoNaGhogKfH1W8SAFx57I/x8ICnUonC5iYAMngolejh6Ij1tXW43z8AjvKLpxX81Amul6Nej5aWlpt6XyLYVLEzGgxXvbZNXxdX/Cc6Bt9XV+Mvp09hco8eV338nwsK8GHEHbjd1Q0flJSg9bKlYBOAEd4+WNGvn2RWXl4Op1/5WwMAyEwmKBWKG35PREREXZ1SLofssmOy7DeOu5cf+6f08MdYv+749tLlUsb6WeYzcXKTEQYbum+sTZ0VK1coYLzKH3RFaytcFQrc6++PlJ5BON7YhEO1dShvvbjv/tOyq85ogJ+DI1qNRmy/cOV1cqK7dcMPdbU4f+lxNe3tqGhthZeX12/u9ZtkMuht6HRoIiIia6E3GmGS/1xJlEolunXr9quPueLY39SIMX5+2HHhArZXXcAYPz8AQKKXF/5VUY62S8XxRrdijTI5FErbWQeznaQAnFxc0H6VH25+czPeOHMacpkMznI5XgsLw2g/Xzx6TAuTCQh3d8Ob/frjD71CcN/Rowh2dsKAq2yr+jo64uXbb8cfjh2D3mSEUibHX8LCEODuDplcjsCAwGtmK/Hywshhw/D+hx9a9D0TERHZk71792Ls2LGSWUtrK/SOjnBydIJ7t25wuo7bhF3t2N/D0RHul3bPelx6juE+PtA2NmLK0WwoZTJM8w/AQz17XnfeNqUSjs7ON/AOxZKZTBa+L1cH+u6773By+3bcffCQ6ChX+N9dg9B/zBiMHDlSdBQiIiKrVV1djZ49e6L10u4YAMybNw9R3TwwJiNDYLKrs7Xju01txfr7+6PRxQXtVvZZtnaFAo0uLvD39xcdhYiIyKr5+Pjgv//9L8aMGYPf//73SE9Px1NPPYXmbu48vluATW3F+vv7Q6ZUos7NDX719b/9gE5S5+YGmVJpU3/wREREotx99924++67zf9eWVnJ47uF2NSKnY+PD5zd3HDex0d0FInzvhdz+VhZLiIiIlvA47vl2FSxUygUUMfGojikFwxy64hukMtR1KsXIuPioLCyJWQiIiJbwOO75VjHT+8GREVFod3VFecuncYsWomfH/SuroiMjBQdhYiIyGbx+G4ZNlfsvL290TcsDIW9Q656TbvOZJTJcKp3CPr26wdvb2+hWYiIiGwZj++WYXPFDgCShgxBo58fCoKu7ya+HSU/KAiNfn5IGjxYaA4iIiJ7wOP7rbPJYhcYGIj4pCScCAtDvaurkAx1rq442S8MAwcPRmDgtS9cTERERNeHx/dbZ5PFDgCSkpLgHRyEzPBw6Dv5g5Z6uRyZd4TDJygIiYmJnfraRERE9ozH91tjs8VOqVRi/KRJaO7ZEz9E3NFp+/FGmQw/RNwBXWBPjJs0CUobun8cERGRtePx/dbYbLEDgICAANw7/QFUh4TgoCqiw5u9Xi7HQVUEqkNCcO/0BxAQENChr0dERNQV8fh+82zqXrHXUlRUhC2bv4BrWRnijh+HR3OzxV+jztUVmXeEQxfYE/dOfwC9e/e2+GsQERHRz3h8v3F2UewAoLy8HFvT0lBzrhQDCgoQVloKuQXemlEmQ35QEE72C4NPUBDGTZpk002eiIjIlvD4fmPsptgBgF6vx4EDB5Bx4ADcq6oQWlSMXlVVUBiNN/xcBrkcJX5+ONU7BI1+fhg4eDASExNtds+diIjIVvH4fv3sqtj9pKysDOkHDuBMfj6Uzc3oXVKCwAvV8GxqgoPBcM3HtSsUqHNzw3lfHxT16gW9qyv69uuHJBs95ZmIiMie8Pj+2+yy2P2kpqYGGo0GmsxMtDQ1waTXw12ng0d1DRz1eshNRhhlcrQplaj38UajiwtkSiWc3dwQGReHyMhIm7viNBERkb3j8f3a7LrY/cRgMKC6uhoVFRWoqKhAZXk52lpaYNDroVAq4ejsjO4BAfD394e/vz98fHxs6oa/REREXRGP71fqEsWOiIiIqCuw6evYEREREdHPWOyIiIiI7ASLHREREZGdYLEjIiIishMsdkRERER2gsWOiIiIyE6w2BERERHZCRY7IiIiIjvBYkdERERkJ1jsiIiIiOwEix0RERGRnWCxIyIiIrITLHZEREREdoLFjoiIiMhOsNgRERER2QkWOyIiIiI7wWJHREREZCdY7IiIiIjsBIsdERERkZ1gsSMiIiKyEyx2RERERHaCxY6IiIjITrDYEREREdkJFjsiIiIiO8FiR0RERGQnWOyIiIiI7ASLHREREZGd+P+Q+TvHpsIeXwAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -107,7 +107,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.13" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/Example_Search_Spaces/imputation.ipynb b/Tutorial/Example_Search_Spaces/imputation.ipynb new file mode 100644 index 00000000..07532532 --- /dev/null +++ b/Tutorial/Example_Search_Spaces/imputation.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "\n", + "simple_imputer = ConfigurationSpace(\n", + " space = {\n", + " 'strategy' : Categorical('strategy', [['mean','median',], ['most_frequent'] ]),\n", + " 'add_indicator' : Categorical('add_indicator', [True, False]), \n", + " }\n", + ")\n", + "\n", + "simple_imputer.sample_configuration()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Configuration(values={\n", + " '2': 2,\n", + " 'a': 2,\n", + "})" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ConfigSpace import ConfigurationSpace, EqualsCondition\n", + "import ConfigSpace\n", + "\n", + "cs = ConfigurationSpace({\n", + "\n", + " \"1\": [1,2,3],\n", + " \"2\": ConfigSpace.Constant(\"2\", 2),\n", + "\n", + " \"a\": [1, 2, 3],\n", + "\n", + "})\n", + "\n", + "cond = EqualsCondition(cs['1'], cs['a'], 1)\n", + "cond2 = EqualsCondition(cs['2'], cs['a'], 2)\n", + "\n", + "cs.add_condition(cond)\n", + "cs.add_condition(cond2)\n", + "\n", + "cs.sample_configuration()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tpot2env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 9649e463..14649f61 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -39,14 +39,14 @@ def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): ) -def get_DecisionTreeClassifier_ConfigurationSpace(random_state=None): +def get_DecisionTreeClassifier_ConfigurationSpace(random_state=None, n_featues=20): space = { 'criterion': Categorical("criterion", ['gini', 'entropy']), - 'max_depth': Integer("max_depth", bounds=(1, 20)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), - 'max_features': Categorical("max_features", ['sqrt', 'log2']), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_featues)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'max_features': Categorical("max_features", [1.0, 'sqrt', 'log2']), 'min_weight_fraction_leaf': 0.0, } @@ -126,11 +126,15 @@ def get_GradientBoostingClassifier_ConfigurationSpace(random_state=None, n_class 'n_estimators': 100, 'loss': loss, 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), 'subsample': Float("subsample", bounds=(0.1, 1.0)), 'max_features': Float("max_features", bounds=(0.1, 1.0)), 'max_depth': Integer("max_depth", bounds=(1, 10)), + + #TODO include max leaf nodes? + #TODO validation fraction + n_iter_no_change? maybe as conditional + 'tol': 1e-4, } @@ -185,8 +189,8 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state=None): 'n_estimators': 100, 'criterion': Categorical("criterion", ["gini", "entropy"]), 'max_features': Float("max_features", bounds=(0.05, 1.00)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), 'bootstrap': Categorical("bootstrap", [True, False]), 'n_jobs': 1, } @@ -236,6 +240,7 @@ def get_MLPClassifier_ConfigurationSpace(random_state=None): space = space ) +GaussianNB_ConfigurationSpace = {} def get_BernoulliNB_ConfigurationSpace(): return ConfigurationSpace( diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 7a9e552e..0710c29e 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -1,11 +1,16 @@ from ..search_spaces.nodes import EstimatorNode -from ..search_spaces.pipelines import ChoicePipeline +from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline -from .classifiers import * -from .transformers import * -from .regressors import * -from .selectors import * +from . import classifiers +from . import transformers +from . import selectors +from . import regressors +from . import autoqtl_builtins +from . import imputers +from . import mdr_configs +from . import special_configs +import numpy as np from sklearn.linear_model import SGDClassifier from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier @@ -49,138 +54,157 @@ from sklearn.feature_selection import f_regression +from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Ridge +from sklearn.linear_model import Lasso +from sklearn.linear_model import ElasticNet +from sklearn.linear_model import Lars +from sklearn.linear_model import LassoLars, LassoLarsCV +from sklearn.linear_model import RidgeCV + + +from sklearn.svm import SVR +from sklearn.svm import LinearSVR + +from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor,RandomForestRegressor +from sklearn.ensemble import BaggingRegressor +from sklearn.ensemble import ExtraTreesRegressor +from sklearn.tree import DecisionTreeRegressor +from sklearn.neighbors import KNeighborsRegressor +from sklearn.linear_model import ElasticNetCV + +from xgboost import XGBRegressor + from tpot2.builtin_modules import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor + +all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, RFE, SelectFromModel, f_classif, f_regression, SGDRegressor, LinearRegression, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, + ] + STRING_TO_CLASS = { - #classifiers - "LogisticRegression": LogisticRegression, - "KNeighborsClassifier": KNeighborsClassifier, - "DecisionTreeClassifier": DecisionTreeClassifier, - "SVC": SVC, - "LinearSVC": LinearSVC, - "RandomForestClassifier": RandomForestClassifier, - "GradientBoostingClassifier": GradientBoostingClassifier, - "XGBClassifier": XGBClassifier, - "LGBMClassifier": LGBMClassifier, - "ExtraTreesClassifier": ExtraTreesClassifier, - "SGDClassifier": SGDClassifier, - "MLPClassifier": MLPClassifier, - "BernoulliNB": BernoulliNB, - "MultinomialNB": MultinomialNB, - - #transformers - "Binarizer": Binarizer, - "Normalizer": Normalizer, - "PCA": PCA, - "ZeroCount": ZeroCount, - "OneHotEncoder": ColumnOneHotEncoder, - "FastICA": FastICA, - "FeatureAgglomeration": FeatureAgglomeration, - "Nystroem": Nystroem, - "RBFSampler": RBFSampler, - - #selectors - "SelectFwe": SelectFwe, - "SelectPercentile": SelectPercentile, - "VarianceThreshold": VarianceThreshold, - "RFE": RFE, - "SelectFromModel": SelectFromModel, + t.__name__: t for t in all_methods } +GROUPNAMES = { + "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], + "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], + "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], + "classifiers" : ["BernoulliNB", "DecisionTreeClassifier", "ExtraTreesClassifier", "GaussianNB", "GradientBoostingClassifier", "KNeighborsClassifier", "LinearDiscriminantAnalysis", "LinearSVC", "QuadraticDiscriminantAnalysis", "PassiveAggressiveClassifier", "LogisticRegression", "MLPClassifier", "MultinomialNB", "PassiveAggressiveClassifier", "Perceptron", "QuadraticDiscriminantAnalysis", "RandomForestClassifier", "RidgeClassifier", "SGDClassifier", "SVC", "XGBClassifier", "LGBMClassifier"], + "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"], +} + def get_configspace(name, n_classes=3, n_samples=100, random_state=None): match name: #classifiers.py case "LogisticRegression": - return get_LogisticRegression_ConfigurationSpace() + return classifiers.get_LogisticRegression_ConfigurationSpace() case "KNeighborsClassifier": - return get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) + return classifiers.get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) case "DecisionTreeClassifier": - return get_DecisionTreeClassifier_ConfigurationSpace() + return classifiers.get_DecisionTreeClassifier_ConfigurationSpace() case "SVC": - return get_SVC_ConfigurationSpace() + return classifiers.get_SVC_ConfigurationSpace() case "LinearSVC": - return get_LinearSVC_ConfigurationSpace() + return classifiers.get_LinearSVC_ConfigurationSpace() case "RandomForestClassifier": - return get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) case "GradientBoostingClassifier": - return get_GradientBoostingClassifier_ConfigurationSpace(n_classes=n_classes) + return classifiers.get_GradientBoostingClassifier_ConfigurationSpace(n_classes=n_classes) case "XGBClassifier": - return get_XGBClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_XGBClassifier_ConfigurationSpace(random_state=random_state) case "LGBMClassifier": - return get_LGBMClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_LGBMClassifier_ConfigurationSpace(random_state=random_state) case "ExtraTreesClassifier": - return get_ExtraTreesClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_ExtraTreesClassifier_ConfigurationSpace(random_state=random_state) case "SGDClassifier": - return get_SGDClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_SGDClassifier_ConfigurationSpace(random_state=random_state) case "MLPClassifier": - return get_MLPClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_MLPClassifier_ConfigurationSpace(random_state=random_state) case "BernoulliNB": - return get_BernoulliNB_ConfigurationSpace() + return classifiers.get_BernoulliNB_ConfigurationSpace() case "MultinomialNB": - return get_MultinomialNB_ConfigurationSpace() + return classifiers.get_MultinomialNB_ConfigurationSpace() #transformers.py case "Binarizer": - return Binarizer_configspace + return transformers.Binarizer_configspace case "Normalizer": - return Normalizer_configspace + return transformers.Normalizer_configspace case "PCA": - return PCA_configspace + return transformers.PCA_configspace case "ZeroCount": - return ZeroCount_configspace + return transformers.ZeroCount_configspace case "OneHotEncoder": - return OneHotEncoder_configspace + return transformers.OneHotEncoder_configspace case "FastICA": - return get_FastICA_configspace() + return transformers.get_FastICA_configspace() case "FeatureAgglomeration": - return get_FeatureAgglomeration_configspace() + return transformers.get_FeatureAgglomeration_configspace() case "Nystroem": - return get_Nystroem_configspace() + return transformers.get_Nystroem_configspace() case "RBFSampler": - return get_RBFSampler_configspace() + return transformers.get_RBFSampler_configspace() #selectors.py case "SelectFwe": - return SelectFwe_configspace + return selectors.SelectFwe_configspace case "SelectPercentile": - return SelectPercentile_configspace + return selectors.SelectPercentile_configspace case "VarianceThreshold": - return VarianceThreshold_configspace + return selectors.VarianceThreshold_configspace case "RFE": - return RFE_configspace_part + return selectors.RFE_configspace_part case "SelectFromModel": - return SelectFromModel_configspace_part + return selectors.SelectFromModel_configspace_part + + return None -def check_for_special(name): - match name: - case "selectors": - return ["SelectFwe", "SelectPercentile", "VarianceThreshold",] - case "classifiers": - return ["LogisticRegression", "KNeighborsClassifier", "DecisionTreeClassifier", "SVC", "RandomForestClassifier", "GradientBoostingClassifier", "XGBClassifier", "ExtraTreesClassifier", "SGDClassifier", "MLPClassifier", "BernoulliNB", "MultinomialNB"] - case "transformers": - return ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"] - - return name +def get_search_space(name, n_classes=3, n_samples=100, random_state=None): + name = GROUPNAMES[name] + if name is None: + return None -def get_search_space(name, n_classes=3, n_samples=100, random_state=None): - name = check_for_special(name) + if name not in STRING_TO_CLASS: + return None #if list of names, return a list of EstimatorNodes if isinstance(name, list) or isinstance(name, np.ndarray): search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, random_state=random_state) for n in name] + #remove Nones + search_spaces = [s for s in search_spaces if s is not None] + return ChoicePipeline(choice_list=search_spaces) else: - return get_estimatornode(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return get_node(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + + +def get_node(name, n_classes=3, n_samples=100, random_state=None): + + #these are wrappers + if name == "RFE_classification": + rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + if name == "RFE_regression": + rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + if name == "SelectFromModel_classification": + sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) + if name == "SelectFromModel_regression": + sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) -def get_estimatornode(name, n_classes=3, n_samples=100, random_state=None): configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - - return EstimatorNode(STRING_TO_CLASS[name], configspace) diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 89bcb60d..5d016d23 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -1,2 +1,9 @@ from ConfigSpace import ConfigurationSpace -from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal \ No newline at end of file +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal + +simple_imputer = ConfigurationSpace( + space = { + 'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]), + 'add_indicator' : Categorical('add_indicator', [True, False]), + } +) \ No newline at end of file diff --git a/tpot2/config/mdr_configs.py b/tpot2/config/mdr_configs.py index bbd7d487..abfe2a4d 100644 --- a/tpot2/config/mdr_configs.py +++ b/tpot2/config/mdr_configs.py @@ -1,6 +1,3 @@ -from mdr import MDR, ContinuousMDR -from skrebate import ReliefF, SURF, SURFstar, MultiSURF -from functools import partial from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal diff --git a/tpot2/graphsklearn.py b/tpot2/graphsklearn.py index 113c70aa..e0d500ae 100644 --- a/tpot2/graphsklearn.py +++ b/tpot2/graphsklearn.py @@ -62,47 +62,34 @@ def _method_name(name, estimator, method): return method -def estimator_fit_transform_override_cross_val_predict(estimator, X, y, cv=5, method='auto',subset_indexes=None, **fit_params): +def estimator_fit_transform_override_cross_val_predict(estimator, X, y, cv=5, method='auto', **fit_params): method = _method_name(name=estimator.__class__.__name__, estimator=estimator, method=method) if cv > 1: - #TODO subset indexes for cross val predict preds = sklearn.model_selection.cross_val_predict(estimator=estimator, X=X, y=y, cv=cv, method=method, **fit_params) estimator.fit(X,y, **fit_params) else: - if subset_indexes is None: - estimator.fit(X,y, **fit_params) - func = getattr(estimator,method) - preds = func(X) - else: - this_X = X[subset_indexes] - this_y = y[subset_indexes] - estimator.fit(this_X, this_y, **fit_params) - func = getattr(estimator,method) - preds = func(X) + estimator.fit(X,y, **fit_params) + func = getattr(estimator,method) + preds = func(X) + return preds, estimator # https://github.com/scikit-learn/scikit-learn/blob/7db5b6a98/sklearn/pipeline.py#L883 -def _fit_transform_one(model, X, y, fit_transform=True, subset_indexes=None, **fit_params): +def _fit_transform_one(model, X, y, fit_transform=True, **fit_params): """Fit and transform one step in a pipeline.""" - if subset_indexes is None: - if fit_transform and hasattr(model, "fit_transform"): - res = model.fit_transform(X, y, **fit_params) - else: - res = model.fit(X, y, **fit_params).transform(X) - #return model - + if fit_transform and hasattr(model, "fit_transform"): + res = model.fit_transform(X, y, **fit_params) else: - this_X = X[subset_indexes] - this_y = y[subset_indexes] - model.fit(this_X, this_y, **fit_params) - res = model.transform(X) + res = model.fit(X, y, **fit_params).transform(X) + #return model + return res, model @@ -110,7 +97,6 @@ def _fit_transform_one(model, X, y, fit_transform=True, subset_indexes=None, **f def fit_sklearn_digraph(graph: nx.DiGraph, X, y, - subset_col = None, method='auto', cross_val_predict_cv = 0, #func(est,X,y) -> transformed_X memory = None, @@ -137,22 +123,15 @@ def fit_sklearn_digraph(graph: nx.DiGraph, else: #in node has inputs, get those this_X = np.hstack([transformed_steps[child] for child in get_ordered_successors(graph, node)]) - - subset_indexes = None - if subset_col is not None and "subset_values" in graph.nodes[node]: - #get indexes of subset_col that are in subset_values - subset_values = graph.nodes[node]["subset_values"] - subset_indexes = np.where(np.isin(subset_col, subset_values))[0] - # Removed so that the cache is the same for all models. Not including transform would index it seperately #if i == len(topo_sort)-1: #last method doesn't need transformed. # instance.fit(this_X, y) if issubclass(type(instance), sklearn.base.RegressorMixin) or issubclass(type(instance), sklearn.base.ClassifierMixin): - transformed, instance = estimator_fit_transform_override_cross_val_predict_cached(instance, this_X, y, cv=cross_val_predict_cv, method=method,subset_indexes=subset_indexes) + transformed, instance = estimator_fit_transform_override_cross_val_predict_cached(instance, this_X, y, cv=cross_val_predict_cv, method=method) else: - transformed, instance = fit_transform_one_cached(instance, this_X, y, subset_indexes=subset_indexes)#instance.fit_transform(this_X,y) + transformed, instance = fit_transform_one_cached(instance, this_X, y)#instance.fit_transform(this_X,y) graph.nodes[node]["instance"] = instance @@ -253,8 +232,6 @@ def __init__( cross_val_predict_cv=0, #signature function(estimator, X, y=none) method='auto', memory=None, #TODO memory caching like sklearn.pipeline - subset_column = None, - drop_subset_column = True, use_label_encoder=False, **kwargs, ): @@ -277,13 +254,6 @@ def __init__( memory: str or object with the joblib.Memory interface, optional Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. - subset_column: int, optional - The column of X that contains the subset values. If None, all rows of X are used. If not None, only the rows of X where X[:,subset_column] is in subset_values are used. - Used to evolve pipelines where recursive graphs use different subsets of rows. - - drop_subset_column: bool, optional - If True, the subset_column is dropped from X before being passed to the pipeline. If False, the subset_column is kept in X. - use_label_encoder: bool, optional If True, the label encoder is used to encode the labels to be 0 to N. If False, the label encoder is not used. Mainly useful for classifiers (XGBoost) that require labels to be ints from 0 to N. @@ -296,8 +266,6 @@ def __init__( self.cross_val_predict_cv = cross_val_predict_cv self.method = method self.memory = memory - self.subset_column = subset_column - self.drop_subset_column = drop_subset_column self.use_label_encoder = use_label_encoder setup_ordered_successors(graph) @@ -327,17 +295,8 @@ def __str__(self): else: return str(self.graph.nodes) - def fit(self, X, y, subset_col = None): - # if self.subset_column is not None and self.subset_values is not None: - - # if isinstance(X, pd.DataFrame): - # indeces_to_keep = X[self.subset_column].isin(self._subset_values) - # X = X[indeces_to_keep] - # y = y[indeces_to_keep] - # else: - # indeces_to_keep = np.isin(X[:,self.subset_column], self._subset_values) - # X = X[indeces_to_keep] - # y = y[indeces_to_keep] + def fit(self, X, y): + if self.use_label_encoder: if type(self.use_label_encoder) == LabelEncoder: @@ -345,11 +304,7 @@ def fit(self, X, y, subset_col = None): else: y = self.label_encoder.fit_transform(y) - if self.subset_column is not None: - subset_col = X[:,self.subset_column] - if self.drop_subset_column: - X = np.delete(X, self.subset_column, axis=1) fit_sklearn_digraph( graph=self.graph, X=X, @@ -358,7 +313,6 @@ def fit(self, X, y, subset_col = None): cross_val_predict_cv = self.cross_val_predict_cv, memory = self.memory, topo_sort = self.topo_sorted_nodes, - subset_col = subset_col, ) return self @@ -380,11 +334,7 @@ def __sklearn_is_fitted__(self): @available_if(_estimator_has('predict')) def predict(self, X, **predict_params): - if self.subset_column is not None: - subset_col = X[:,self.subset_column] - if self.drop_subset_column: - X = np.delete(X, self.subset_column, axis=1) this_X = get_inputs_to_node(self.graph, X, @@ -402,9 +352,7 @@ def predict(self, X, **predict_params): @available_if(_estimator_has('predict_proba')) def predict_proba(self, X, **predict_params): - if self.subset_column is not None: - if self.drop_subset_column: - X = np.delete(X, self.subset_column, axis=1) + this_X = get_inputs_to_node(self.graph, X, @@ -416,9 +364,7 @@ def predict_proba(self, X, **predict_params): @available_if(_estimator_has('decision_function')) def decision_function(self, X, **predict_params): - if self.subset_column is not None: - if self.drop_subset_column: - X = np.delete(X, self.subset_column, axis=1) + this_X = get_inputs_to_node(self.graph, X, self.root, @@ -429,10 +375,6 @@ def decision_function(self, X, **predict_params): @available_if(_estimator_has('transform')) def transform(self, X, **predict_params): - - if self.subset_column is not None: - if self.drop_subset_column: - X = np.delete(X, self.subset_column, axis=1) this_X = get_inputs_to_node(self.graph, X, diff --git a/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py b/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py new file mode 100644 index 00000000..c53d4715 --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py @@ -0,0 +1,52 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace + +class EstimatorNodeCustomIndividual(SklearnIndividual): + def __init__(self, method: type, + sample_func : callable, + rng=None) -> None: + super().__init__() + self.method = method + self.sample_func = sample_func + + self.hyperparameters = self.sample_func(rng) + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + self.hyperparameters = self.sample_func(rng) + return True + + def crossover(self, other, rng=None): + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameters) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + return (self.method, self.hyperparameters) + +class EstimatorNodeCustom(SklearnIndividualGenerator): + def __init__(self, method : type, + sample_func: callable): + self.method = method + self.sample_func = sample_func + + def generate(self, rng=None): + return EstimatorNodeCustomIndividual(self.method, self.sample_func) \ No newline at end of file diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index 50dfa6e0..7465564c 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -83,10 +83,7 @@ def __init__(self, scorers, stepwise_steps = 5, - optuna_optimize_pareto_front = False, - optuna_optimize_pareto_front_trials = 100, - optuna_optimize_pareto_front_timeout = 60*10, - optuna_storage = "sqlite:///optuna.db", + #dask parameters n_jobs=1, @@ -418,10 +415,7 @@ def __init__(self, scorers, self.scatter = scatter - self.optuna_optimize_pareto_front = optuna_optimize_pareto_front - self.optuna_optimize_pareto_front_trials = optuna_optimize_pareto_front_trials - self.optuna_optimize_pareto_front_timeout = optuna_optimize_pareto_front_timeout - self.optuna_storage = optuna_storage + # create random number generator based on rngseed self.rng = np.random.default_rng(random_state) @@ -694,19 +688,7 @@ def ind_generator(rng): self.make_evaluated_individuals() - if self.optuna_optimize_pareto_front: - pareto_front_inds = self.pareto_front['Individual'].values - all_graphs, all_scores = tpot2.individual_representations.graph_pipeline_individual.simple_parallel_optuna(pareto_front_inds, objective_function, self.objective_function_weights, _client, storage=self.optuna_storage, steps=self.optuna_optimize_pareto_front_trials, verbose=self.verbose, max_eval_time_seconds=self.max_eval_time_seconds, max_time_seconds=self.optuna_optimize_pareto_front_timeout, **{"X": X, "y": y}) - all_scores = tpot2.utils.eval_utils.process_scores(all_scores, len(self.objective_function_weights)) - if len(all_graphs) > 0: - df = pd.DataFrame(np.column_stack((all_graphs, all_scores,np.repeat("Optuna",len(all_graphs)))), columns=["Individual"] + self.objective_names +["Parents"]) - for obj in self.objective_names: - df[obj] = df[obj].apply(convert_to_float) - - self.evaluated_individuals = pd.concat([self.evaluated_individuals, df], ignore_index=True) - else: - print("WARNING NO OPTUNA TRIALS COMPLETED") tpot2.utils.get_pareto_frontier(self.evaluated_individuals, column_names=self.objective_names, weights=self.objective_function_weights, invalid_values=["TIMEOUT","INVALID"]) @@ -821,7 +803,9 @@ def ind_generator(rng): self.selected_best_score = self.evaluated_individuals.loc[best_idx] - best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) + #TODO + #best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) + best_individual_pipeline = best_individual.export_pipeline() if self.preprocessing: self.fitted_pipeline_ = sklearn.pipeline.make_pipeline(sklearn.base.clone(self._preprocessing_pipeline), best_individual_pipeline ) diff --git a/tpot2/tpot_estimator/estimator_utils.py b/tpot2/tpot_estimator/estimator_utils.py index 47f31450..36e5c53c 100644 --- a/tpot2/tpot_estimator/estimator_utils.py +++ b/tpot2/tpot_estimator/estimator_utils.py @@ -12,6 +12,7 @@ def convert_parents_tuples_to_integers(row, object_to_int): else: return np.nan +#TODO add kwargs def apply_make_pipeline(graphindividual, preprocessing_pipeline=None): try: if preprocessing_pipeline is None: @@ -100,8 +101,8 @@ def recursive_with_defaults(config_dict, n_samples, n_features, classification, -def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_functions, memory=None, cross_val_predict_cv=None, subset_column=None, step=None, budget=None, generation=1,is_classification=True): - pipeline = pipeline.export_pipeline(memory=memory, cross_val_predict_cv=cross_val_predict_cv, subset_column=subset_column) +def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_functions, step=None, budget=None, generation=1, is_classification=True, **pipeline_kwargs): + pipeline = pipeline.export_pipeline(**pipeline_kwargs) if budget is not None and budget < 1: if is_classification: x,y = sklearn.utils.resample(x,y, stratify=y, n_samples=int(budget*len(x)), replace=False, random_state=1) @@ -127,9 +128,9 @@ def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_fun return np.concatenate([cv_obj_scores,other_scores]) -def val_objective_function_generator(pipeline, X_train, y_train, X_test, y_test, scorers, other_objective_functions, memory, cross_val_predict_cv, subset_column): +def val_objective_function_generator(pipeline, X_train, y_train, X_test, y_test, scorers, other_objective_functions, **pipeline_kwargs): #subsample the data - pipeline = pipeline.export_pipeline(memory=memory, cross_val_predict_cv=cross_val_predict_cv, subset_column=subset_column) + pipeline = pipeline.export_pipeline(**pipeline_kwargs) fitted_pipeline = sklearn.base.clone(pipeline) fitted_pipeline.fit(X_train, y_train) diff --git a/tpot2/tpot_estimator/templates/tpot_autoimputer.py b/tpot2/tpot_estimator/templates/tpot_autoimputer.py new file mode 100644 index 00000000..e69de29b From a28b62ff501e942a0fa2ae4cd6ef3cc8fec7e983 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 26 Mar 2024 19:06:45 -0700 Subject: [PATCH 04/75] tutorials fixes --- Tutorial/1_Estimators_Overview.ipynb | 967 +--- ...Defining_Search_Space_(config_dicts).ipynb | 478 -- Tutorial/2_Search_Spaces.ipynb | 4320 ++++++++++++++++- Tutorial/3_Feature_Set_Selector.ipynb | 1244 +++++ .../3_Genetic_Feature_Set_Selectors.ipynb | 1147 ----- ...mbolic_Regression_and_Classification.ipynb | 130 +- Tutorial/5_Genetic_Feature_Selection.ipynb | 596 +++ ...phPipeline.ipynb => 6_GraphPipeline.ipynb} | 0 Tutorial/7_dask_parallelization.ipynb | 107 +- ...ipynb => 8_SH_and_early_termination.ipynb} | 68 +- ...ynb => 9_Genetic_Algorithm_Overview.ipynb} | 0 setup.py | 3 +- tpot2/config/autoqtl_builtins.py | 15 +- tpot2/config/classifiers_sklearnex.py | 9 +- tpot2/config/get_configspace.py | 232 +- tpot2/config/mdr_configs.py | 15 +- tpot2/config/regressors_sklearnex.py | 28 - tpot2/config/special_configs.py | 28 +- tpot2/search_spaces/base.py | 2 +- tpot2/search_spaces/nodes/__init__.py | 3 +- tpot2/search_spaces/nodes/estimator_node.py | 2 +- tpot2/search_spaces/nodes/fss_node.py | 79 + .../nodes/genetic_feature_selection.py | 6 +- tpot2/search_spaces/pipelines/graph.py | 6 +- tpot2/search_spaces/pipelines/sequential.py | 2 +- tpot2/tpot_estimator/estimator_utils.py | 3 +- .../tpot_estimator/templates/tpottemplates.py | 50 +- 27 files changed, 6692 insertions(+), 2848 deletions(-) delete mode 100644 Tutorial/2_Defining_Search_Space_(config_dicts).ipynb create mode 100644 Tutorial/3_Feature_Set_Selector.ipynb delete mode 100644 Tutorial/3_Genetic_Feature_Set_Selectors.ipynb create mode 100644 Tutorial/5_Genetic_Feature_Selection.ipynb rename Tutorial/{5_GraphPipeline.ipynb => 6_GraphPipeline.ipynb} (100%) rename Tutorial/{6_SH_and_early_termination.ipynb => 8_SH_and_early_termination.ipynb} (97%) rename Tutorial/{8_Genetic_Algorithm_Overview.ipynb => 9_Genetic_Algorithm_Overview.ipynb} (100%) diff --git a/Tutorial/1_Estimators_Overview.ipynb b/Tutorial/1_Estimators_Overview.ipynb index bea7facb..33a71097 100644 --- a/Tutorial/1_Estimators_Overview.ipynb +++ b/Tutorial/1_Estimators_Overview.ipynb @@ -21,34 +21,100 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 2, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: : 0it [00:00, ?it/s]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.9910779766159422, 0.9164180506462885, 0.9842489682733572, 0.99664936783213, 0.9913591306204854, 0.9785097508524515, 0.9843199854934415, 0.9981583597446381, 0.99559929270021, 0.9511441780591989, 0.9984744292898663, 0.9974402833866118, 0.9914842901220224, 0.9969100719668479, 0.9909145271063142, 0.9910709279190263, 0.9411694123791475, 0.9910354966095938, 0.9776626614599555, 0.9911887873368403, 0.9966903342486351, 0.9988343538601064, 0.9735967719140286, 0.9968575356141441, 0.9958485748358322, 0.9992471065344972, 0.9605917171252578, 0.9904942837739565, 0.9974574181131549, 0.9996403182930008, 0.9694102480973864, 0.9984821310846055, 0.9940551825220357, 0.9837735643634151, 0.9671044961833003, 0.9913835311537978, 0.9989793765342894, 0.9997847101769164, 0.991564988067797, 0.9988538844163573, 0.9895795999679059, 0.9750578580595717, 0.9971245111678281, 0.997177499370075, 0.9988702870584362, 'INVALID', 0.9131272065575761, 'INVALID', 0.9969386481385651, 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.loc[key,column_names] = data\n", + "Generation: : 1it [00:15, 15.14s/it]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_crossover' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.at[new_child.unique_id(),\"Variation_Function\"] = var_op\n", + "Generation: : 3it [01:30, 30.07s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9998423966736188\n" + ] + } + ], "source": [ - "### Best Practices\n", + "import tpot2\n", + "import sklearn\n", + "import sklearn.datasets\n", "\n", - "When running tpot from an .py script, it is important to protect code with `if __name__==\"__main__\":`" + "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", + "X, y = sklearn.datasets.load_digits(return_X_y=True)\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", + "\n", + "est = tpot2.TPOTClassifier(n_jobs=4, max_time_seconds=60, verbose=2)\n", + "est.fit(X_train, y_train)\n", + "\n", + "\n", + "print(scorer(est, X_test, y_test))" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Evaluations: : 19it [00:30, 1.59s/it]\n" + "Generation: : 1it [00:03, 3.46s/it]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_mutate' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.at[new_child.unique_id(),\"Variation_Function\"] = var_op\n", + "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['INVALID', -6039.425686838566, -3697.10004861336, -3609.502376363637, -5283.160282257095, -3081.9675902170966, -3159.936813075531, -6.206059150347736e+26, -2983.0962978018993, -3310.997549085264, -3255.911452949286, -3437.783411085481, -6124.753602783256, -2983.0962978018993, 'INVALID', -2951.7123134502217, -3172.715957996382, -3203.3174204236266, -3172.204242312273, -3132.3227284760787, -2904.2526718694835, 'INVALID', 'INVALID', -3065.7604709862753, -2887.332924732963, -3746.3702654907283, -4323.858875859902, -3277.5596662104786, -3687.211654454998, -3801.6275361827234, -3635.7122609143, -3657.0684867411182, -6050.448478793622, -3272.2301971831594, 'INVALID', 'INVALID', -3507.5157340117594, -3291.107812406151, -3673.8549030697295, 'INVALID', -3455.0876361736764, 'INVALID', -3201.783121939595, -2912.050142543978, -3808.6567781967847, 'INVALID', 'INVALID', -2895.0114530615692, -2947.503341616811, -3662.3274712695893]' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.loc[key,column_names] = data\n", + "Generation: : 9it [00:32, 3.63s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9996046124012956\n" + "-3453.3557493847698\n" ] } ], + "source": [ + "import tpot2\n", + "import sklearn\n", + "import sklearn.metrics\n", + "import sklearn.datasets\n", + "\n", + "scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')\n", + "X, y = sklearn.datasets.load_diabetes(return_X_y=True)\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", + "est = tpot2.tpot_estimator.templates.TPOTRegressor(n_jobs=4, max_time_seconds=30, verbose=2)\n", + "est.fit(X_train, y_train)\n", + "\n", + "print(scorer(est, X_test, y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Best Practices\n", + "\n", + "When running tpot from an .py script, it is important to protect code with `if __name__==\"__main__\":`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#my_analysis.py\n", "\n", @@ -159,24 +225,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Evaluations: : 117it [00:30, 3.85it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9974747474747474\n" - ] - } - ], + "outputs": [], "source": [ "import tpot2\n", "import sklearn\n", @@ -202,20 +253,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWlElEQVR4nO3df4xfdZ3v8df8KDAzvYWOo9Pf3Vqm/FJKS6tAAckaqRdJBa6Ki7rXZENuTHYT/P0jmBiTVblAiLnqjQlkDReDaBalFhU3jZilIJTWttRWW+lP+mNkmGmhMy0wP+4fur1b+VWgZdr3fTz+mjlnzvl8zneSb55z5nzPaRgZGRkJAADHvcbRngAAAEeGsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAimkd7AgBH0tDQUHp7e9Pd3Z3u7u48uXt3nt2/P8NDQ2lsasqJLS1584QJ6ezsTGdnZ9rb29PU1DTa0wY4IhpGRkZGRnsSAK9XX19fVq9encdWrsyB/v6MDA5m7P79Obm3N2MGB9M4MpLhhoY839ycve3t2dfSkobm5pzU1pa3z52b2bNnZ/z48aN9GACvi7ADjms7d+7Mgw88kM0bN2bMwECmbdueib29Obm/P2OGhl5yu+ebmrK3rS272tuzbdrUPN/amhldXVlw0UWZOHHiG3gEAEeOsAOOS4ODg1m2bFmWL1uWsT09OXXrtkzp6UnT8PCr3tdQY2Oe6OjIH6dPy76OjsxfsCALFixIc7OrVYDji7ADjju7d+/OvYsXp++JHTl948Z07diRxiPwVjbc0JCNkyfn911daZ8yOZctWpQJEyYcgRkDvDGEHXBc2bp1a358111p3bkr565fn3EDA0d8jKdbW7PijDMyMGlSrrz6Q5k+ffoRHwPgaBB2wHFj69at+dc778ybtm7LO9atS/Nr+Lfr4RpsbMzDZ52Z3mnT8t/+7u/EHXBccB874Liwe/fu/Piuu9K+dVvO+93vjmrUJUnz8HDOX/u7tG/blh/f9cPs3r37qI4HcCQIO+CYNzg4mHsXL07rzl1557p1R+R6usPRODKSd/5uXVp27czPFi/O4ODgGzIuwGsl7IBj3rJly9L3xI6cu379UT9T99eah4dz7rr16d2xIw8++OAbOjbAqyXsgGPazp07s3zZspy+ceNR+aDE4Th5YCCnbdiYRx54ILt27RqVOQAcDmEHHNMefOCBjO3pSdeOHaM6j1k7dmRsT0+WPfDAqM4D4OUIO+CY1dfXl80bN+bUrdvesOvqXkrjyEhmbt2WzRs2pK+vb1TnAvBShB1wzFq9enXGDAxkSk/PaE8lSTK1pyfNAwNZs2bNaE8F4EUJO+CYNDQ0lMdWrsy0bdtf02PCjoam4eFM3749a1asyNDLPIcWYLQIO+Cgjo6Og1/ffvvtmTNnTvr6+vLxj388M2bMOHi7j7Vr1+aSSy552X0tXrw4t9xyy8v+zFe+8pV861vfesHy+++/P1dccUUO9PdnYm/vqz+Ql/DM4GC+sGFD/nb58ly16rf5h9+tzeb9A3l4z5780/p1h7WPiU/15kB/f3pfYl6PPvpoPvvZzyZJnnzyybzzne/MnDlz8utf/zof+chHXvcxPPLII5k3b17GjBmTJUuWvO79AbV4wjXwAnfffXduuOGG/OpXv8r48eOT/PlecnfeeWc+9rGPHdY+Fi1a9Lrm8Oyzz2ZkcDCn7Nv3qrYbHhlJY0PDi677/IYNOa2tNUvnzUtDQ0M29Pen57nnX9X+T+7vz8jgYLq7u/PmN7/5BevnzZuXefPmJUmWLl2a+fPnH4zXd73rXYc9ztDQUJqaml6wfNKkSbntttty8803v6p5A/9/cMYOOMR9992XL3zhC/nFL36Rt7zlLQeXX3fddbnxxhvz108hHBoayqc//enMnz8/s2fPzve///0kyfe+97185jOfSZJs2LAh8+bNy+zZs/OpT33qYPgkyapVq3LxxRfnrW99a37wgx8cXN7T05Pv3X573rf8kdywedPB5T/5U3cuX7ki71u5Irc+8USS5IkDB3L5yhW57vfr819Xrsi+wcH8w9q1uXzlily+ckX+va8vW/bvz+/7+/NP06an4S/hN6utLfNPPvmQ41n19NP50OpVueK3K/ORNauz48CBJMlv9uzJ5StX5Krly/Pt73433d3deeyxxzJ37tycc845Oeecc/KnP/0p999/fz7wgQ/ksccey+c+97n88Ic/zLx587Jly5aDx/1yr9lVV12VSy65JB/84Adf9PczZcqUzJ49O42N3r6BF3LGDjjomWeeyTXXXJPf/OY3mTp16iHrTjvttJx22mm55557cuqppx5cftttt2XixIlZvnx59u/fn/POOy/vfe97D9n2uuuuy/XXX58rrrgi119//SHrHn/88SxdujTbtm3LwoUL8+EPfzhJsm7dutxwxRW59Ikd+fvH1uThPXsyvaUl/2vbtvzr7HPS0tSUq1evynmnnJxTmsfk8YGB3HTa6Tm9rS339fTklDHNue1tb8vIyEj6h4by8N69Ob2t7SXP5v2HU1tbc+fZs9PU0JClTz2V72zfnn/u6sq/7NiRL854axaMH59fzZiRJ3fvzk9+8pN84hOfyLXXXpv9+/cfcobt7W9/e7761a9m7dq1uemmm7Jly5bDes1Wr16d3/72txk3btzh/+IA/sKffMBBra2tmTNnTu64444XXf/FL34xX//61w9Z9stf/jK33nprzjnnnJx//vnZu3dvNm3adMjPrFixIu9///uTJFdfffUh6y6//PKMGTMmM2fOzJ49ew4uP3XmzEw86aQ0NzTkvR0dWfH003ls3zM5/+RTcsqYMTmxsTELOzqyYu/TSZK/aWnJ6W1tSZJZba1Zvndv/ufmzVn1zDMZ23z4f8PuHRzMP65fl/etXJGbtmzO43+5KfLcceNy05YtuX3njowcOJDnDhzI+eefn5tvvjnf+MY3smvXrpxwwgmHNcbLvWYLFy4UdcBrJuyAg5qamnL33Xfnnnvuya233vqC9XPnzs348eOzdOnSg8uGh4fz3e9+N6tWrcqqVauyZcuWzJ8//7DHPPHEE19y3X++d90rnGhLy386WzajpTX3zJmbU1tb8/XNm/J/du7MzNbW/GGgP8OvcD+8b27bmne1t+feuefmm6efkedG/vyJ3P8xdWq+1tWV/qGhXP+ze9Pz5JO55pprsmTJkpx44ol5z3vek5UrVx7GEb/8a9ba2npY+wB4McIOOMS4cePys5/9LF/72tdy7733vmD9l770pdx0000Hv7/00kvzne985+DtP9auXfuCW4HMnTs3P/3pT5MkP/rRjw5rHn98/PE8OTCQwZGR/LLnqZw7blzOHvtf8tDePdk7+HyeGx7Ovz31VOb91TVySdL97LNpbWrKVZ2d+e+TJmd9/778TUtLZrW25dvbtx28TnBjf38e3bv3kG33DQ6l84Q/x+bdf+o+uHzb/v05Y+zYfGLqtEw55ZT07tmTTZs2ZebMmfnkJz+ZSy+9NOvWHd4naw/nNQN4LVxjB7zApEmTsmTJkixcuDB33333IesuvvjiTJs27eD31157bTZv3pw5c+ZkeHg4EydOzM9//vNDtrnlllvy0Y9+NF/+8pdz0UUXHda/Gk+dOTP/+6GHcmNvb/62vT3vOPmUJMk/Tp2Wj6xZk5EkV76lM2eNHZsn/vIBh/+wYWAgN2zelMaGhpzU2JivdXUlSb4xqyv/vGlT3v3oo2ltasyEE0/M9W+dme5nn/1/xzNlSj6/YUO+uXVLLhrffnD5v+zckYf37k1Tks4pU3LmWWflrrvuyh133JExY8Zk+vTpufLKK7N8+fJXPLbDec1eypo1a3LZZZelr68vS5YsSVdXVx566KHD2haor2Hkrz/iBnCEDQwMpKWlJQ0NDbnxxhvT3d19yFm/F7N06dL84b778p6HfvMGzfLw/dv55+W0hQvz7ne/e7SnAnAIZ+yAo+6RRx7Jddddl6GhoUyZMiW33377K27T2dmZFS0teb6pKWOOoX9TPt/UlH0tLens7BztqQC8gLADjrpLLrkkq1atelXbdHZ2pqG5OXvb2tLx9NNHZ2Kvwd62tjQ0Nx/1sLvvvvvy+c9//pBlCxYsyLe//e2jOi5wfBN2wDGpvb09J7W1ZVd7+zEVdrve9Od5tbe3v/IPvw4LFy7MwoULj+oYQD0+FQsck5qamvL2uXOzbdrUDB0jT1kYamzM1qlTc/a5577o474ARtux8W4J8CJmz56d51tb80RHx2hPJUmyvaMjg62tOfvss0d7KgAvStgBx6zx48dnRldX/jh9WoZf6Q7FR9lwQ0Menz4tM2bNyvjx40d1LgAvRdgBx7QFF12UfR0d2Th58qjOY8PkydnX0ZEFF144qvMAeDnCDjimTZw4MfMXLMjvu7ry9Cg9bmtva2v+MKsr77jwwkycOHFU5gBwOIQdcMxbsGBBxk+ZnBVnnJHBN/iDFIONjVlx5hlpnzw5F1xwwRs6NsCrJeyAY15zc3Pet2hRBiZNysNnnfmGXW833NCQh886M/snTsplixaludkdooBjm7ADjgsTJkzIlVd/KL3TpuWht5111M/cDTY25qG3nZXeadNy5dUfyoQJE47qeABHgmfFAseVrVu35sd3/TCtO3fm3PXrM25g4IiPsbe1NSvOPCP7J07KlVd/KNOnTz/iYwAcDcIOOO7s3r079y5enL4nduT0jRvTtWNHGo/AW9lwQ0M2TJ6cP8zqSvvkybls0SJn6oDjirADjkuDg4NZtmxZli9blrE9PZm5dVum9vSkaXj4Ve9rqLEx2zs68vj0adnX0ZF3XHhhLrjgAtfUAccdYQcc13bu3JkHly3L5g0b0jwwkOnbt2fiU705ub8/Y4aGXnK755uasretLbve1J6tU6dmsLU1M2bNygK3NAGOY8IOKKGvry9r1qzJmhUrcqC/PyODgxm7f3/G9fblhMHBNI4MZ7ihMc81N+fp9vHZ19KShubmnNTWlrPPPTdnn322J0oAxz1hB5QyNDSU3t7edHd3p7u7O0/u3p3nDhzI0OBgmpqbc8JJJ+XNEyaks7MznZ2daW9vT1NT02hPG+CIEHYAAEW4jx0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAU8X8BMlMHJ2fHGe0AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fitted_pipeline = est.fitted_pipeline_ # access best pipeline directly\n", "fitted_pipeline.plot()" @@ -223,228 +263,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
roc_auc_scoreParentsVariation_FunctionIndividualSubmitted TimestampCompleted TimestampPareto_FrontInstance
00.994405NaNNaN['LogisticRegression_1']1.692231e+091.692231e+09NaN['LogisticRegression_1']
10.954484NaNNaN['DecisionTreeClassifier_1']1.692231e+091.692231e+09NaN['DecisionTreeClassifier_1']
21.000000NaNNaN['KNeighborsClassifier_1']1.692231e+091.692231e+091.0['KNeighborsClassifier_1']
30.994048NaNNaN['GradientBoostingClassifier_1']1.692231e+091.692231e+09NaN['GradientBoostingClassifier_1']
40.989841NaNNaN['ExtraTreesClassifier_1']1.692231e+091.692231e+09NaN['ExtraTreesClassifier_1']
...........................
1120.997540(105, 106)crossover['MLPClassifier_1']1.692231e+091.692231e+09NaN['MLPClassifier_1']
1130.998214(15,)mutate['KNeighborsClassifier_1']1.692231e+091.692231e+09NaN['KNeighborsClassifier_1']
1140.997619(67, 67)crossover[('MLPClassifier_1', 'StandardScaler_1')]1.692231e+091.692231e+09NaN[('MLPClassifier_1', 'StandardScaler_1')]
1150.996944(81,)mutate[('ExtraTreesClassifier_1', 'RBFSampler_1'), (...1.692231e+091.692231e+09NaN[('ExtraTreesClassifier_1', 'RBFSampler_1'), (...
1161.000000(90, 73)crossover[('MLPClassifier_1', 'MinMaxScaler_1')]1.692231e+091.692231e+09NaN[('MLPClassifier_1', 'MinMaxScaler_1')]
\n", - "

117 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " roc_auc_score Parents Variation_Function \\\n", - "0 0.994405 NaN NaN \n", - "1 0.954484 NaN NaN \n", - "2 1.000000 NaN NaN \n", - "3 0.994048 NaN NaN \n", - "4 0.989841 NaN NaN \n", - ".. ... ... ... \n", - "112 0.997540 (105, 106) crossover \n", - "113 0.998214 (15,) mutate \n", - "114 0.997619 (67, 67) crossover \n", - "115 0.996944 (81,) mutate \n", - "116 1.000000 (90, 73) crossover \n", - "\n", - " Individual Submitted Timestamp \\\n", - "0 ['LogisticRegression_1'] 1.692231e+09 \n", - "1 ['DecisionTreeClassifier_1'] 1.692231e+09 \n", - "2 ['KNeighborsClassifier_1'] 1.692231e+09 \n", - "3 ['GradientBoostingClassifier_1'] 1.692231e+09 \n", - "4 ['ExtraTreesClassifier_1'] 1.692231e+09 \n", - ".. ... ... \n", - "112 ['MLPClassifier_1'] 1.692231e+09 \n", - "113 ['KNeighborsClassifier_1'] 1.692231e+09 \n", - "114 [('MLPClassifier_1', 'StandardScaler_1')] 1.692231e+09 \n", - "115 [('ExtraTreesClassifier_1', 'RBFSampler_1'), (... 1.692231e+09 \n", - "116 [('MLPClassifier_1', 'MinMaxScaler_1')] 1.692231e+09 \n", - "\n", - " Completed Timestamp Pareto_Front \\\n", - "0 1.692231e+09 NaN \n", - "1 1.692231e+09 NaN \n", - "2 1.692231e+09 1.0 \n", - "3 1.692231e+09 NaN \n", - "4 1.692231e+09 NaN \n", - ".. ... ... \n", - "112 1.692231e+09 NaN \n", - "113 1.692231e+09 NaN \n", - "114 1.692231e+09 NaN \n", - "115 1.692231e+09 NaN \n", - "116 1.692231e+09 NaN \n", - "\n", - " Instance \n", - "0 ['LogisticRegression_1'] \n", - "1 ['DecisionTreeClassifier_1'] \n", - "2 ['KNeighborsClassifier_1'] \n", - "3 ['GradientBoostingClassifier_1'] \n", - "4 ['ExtraTreesClassifier_1'] \n", - ".. ... \n", - "112 ['MLPClassifier_1'] \n", - "113 ['KNeighborsClassifier_1'] \n", - "114 [('MLPClassifier_1', 'StandardScaler_1')] \n", - "115 [('ExtraTreesClassifier_1', 'RBFSampler_1'), (... \n", - "116 [('MLPClassifier_1', 'MinMaxScaler_1')] \n", - "\n", - "[117 rows x 8 columns]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#view the summary of all evaluated individuals as a pandas dataframe\n", "est.evaluated_individuals" @@ -481,26 +302,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Evaluations: : 143it [00:30, 4.74it/s]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9934232434232434\n" - ] - } - ], + "outputs": [], "source": [ "import tpot2\n", "import sklearn\n", @@ -526,20 +330,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWU0lEQVR4nO3df5DV9b3f8df+4NeK4G7wLj8EjApB/EXAqBHMaGK0NZFETcl1mh9OE9tLM9VJm9y0/3S86Z12WszEmTsmmampTjrmjtdcM0HjbdIxMTOsaBQENGgg/gDchdV1FxUWlD27/UPDvSZiUVeBdx6Pv9g953y+77PMnHnO99dpGhkZGQkAAEe85kM9AAAAo0PYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBGth3oAgNHUaDTS39+f3t7e9Pb25rkdO/Lynj0ZbjTS3NKScRMm5NipU9PZ2ZnOzs50dHSkpaXlUI8NMCqaRkZGRg71EADv1MDAQNavX59H1q7N3t27MzI0lIl79mRyf3/GDA2leWQkw01N2dfamhc6OrJrwoQ0tbZm/FFH5bSFC3PGGWekvb39UL8NgHdE2AFHtJ6enty3alWe2rw5YwYHM2vrtkzr78/k3bszptE44Ov2tbTkhaOOyvaOjmydNTP72try/jlzsvi88zJt2rT38B0AjB5hBxyRhoaG0tXVlQe7ujKxry8nbdma4/r60jI8/JbXajQ355kpU/K72bOya8qUfGjx4ixevDitrc5WAY4swg444uzYsSM/XbkyA890Z97mzZnT3Z3mUfgoG25qyuYZM/L4nDnpOG5GLlm6NFOnTh2FiQHeG8IOOKJs2bIlP77ttrT1bM+ixx7LpMHBUd/Gi21tWXPyyRmcPj2XfXZZZs+ePerbAHg3CDvgiLFly5b8/d/+bd63ZWvO2rgxrW/jsOvBGmpuzgOnzE//rFm54sorxR1wRHAfO+CIsGPHjvz4ttvSsWVrzvnNb97VqEuS1uHhfPjR36Rj69b8+La/y44dO97V7QGMBmEHHPaGhoby05Ur09azPWdv3Dgq59MdjOaRkZz9m42ZsL0nd69cmaGhofdkuwBvl7ADDntdXV0ZeKY7ix577F3fU/eHWoeHs2jjY+nv7s599933nm4b4K0SdsBhraenJw92dWXe5s3vyoUSB2Py4GA+sGlzfr1qVbZv335IZgA4GMIOOKzdt2pVJvb1ZU539yGdY253dyb29aVr1apDOgfAmxF2wGFrYGAgT23enJO2bH3Pzqs7kOaRkZy4ZWue2rQpAwMDh3QWgAMRdsBha/369RkzOJjj+voO9ShJkpl9fWkdHMyGDRsO9SgAb0jYAYelRqORR9auzayt297W14S9G1qGhzN727ZsWLMmjTf5HlqAQ0XYAW9qypQp73iNL3/5y3niiScO+PgNN9yQV155Zf/PF1xwQfr7+7N39+5M6+//o+d/bsOGXLzmoVy6dm0uX/dwNu7a9Y5nPFjTnn91rv43mOuhhx7K17/+9VHb1q9//euceeaZGTNmTO66665RWxeoS9gB77qbbropJ5544gEf/8Ow++Uvf5ne3t6MDA3lmANE29/MOzl3LlyYP586Lf/j6afe8YyNgzyHb/Lu3RkZGkpvb+8fPXbmmWdmxYoV73iW35s+fXq+//3v58orrxy1NYHahB3wlq1duzZnnXVWTjvttHzhC1/I3r17kyQ/+clPMnfu3HzoQx/Kl770pXzta19Lkpx//vl59NFH02g08rnPfS7z58/Paaedlptvvjk33nhjenp6cu6552bp0qVJXt1L2Nvbm4l79uR/bnk6n1y7JpeuXZOb3+DK2EWTJmXHyy8neTXO/tuTT+bydQ/n0rVrs/LZZ5Mkg41G/u3Gjfnnax7Kf9y0Kec/+OvsbjTywM6d+cIjG/Ll3zyaP9+wPoONRr6x6be5fN3Duezhh9P12kUS9+/c+doMa/PZNQ9l4p49Wb16dRYuXJgFCxZkwYIFefbZZ3PvvffmM5/5TJKkr68vl156aU4//fScf/75efrpp5MkV111Va699tqcc845mTNnTn71q18d8O983HHH5Ywzzkhzs49q4OC0HuoBgCPPF7/4xdx00005++yzs3z58nznO9/J8uXLc80116SrqytTp07NhRdemDPPPPN1r1u3bl2eeuqpbNy4MUnywgsvZPLkyVmxYkXuu+++TJw4cf9zn9uxI0+tX5/VO3fmjgUfzNjm5uzct++PZrm3vz8f63hfkuT23h35s7Fjc8eCD2Zvo5F/sX59zmtvz496d2TG+HH5zvz56do5kDue/ce9bY/u2pV/WLgonePG5VtPP50LOjry3+d+IP379uXKDevzfxYuys3d3flP7z8hi9vb89LQUDb2D+R/3357li9fnquvvjp79uxJS0vL6+a67rrrct555+XOO+/MbbfdlmuuuSYrV65MkvT39+f+++/PL37xi3zzm9/MPffcMzr/McCfPGEHvCU7d+7Myy+/nLPPPjtJ8vnPfz4rVqzIRz/60cybNy/HHXdckuSKK67Ili1bXvfaE044IT09PfnKV76ST33qU7nooosOuJ2X9+zJY888kys6p2bsa3usjhkzZv/j/+7xx/LK8HB2NRpZ+cGFSZKugYFsGhzMT557dU/drsZQtu3dm7UvvpR//dpci49pzzGt//jRt3DSpHSOG/fq63cO5N7+5/OdbduSJHsajfTt25eFkybl+qefzhN7BvPPphybsUNDOeH44/Otb30rzz//fJYtW5YTTjjhdfOvWrUqd999d5Jk2bJlufbaa/c/9ulPfzpJsmjRov178gBGg7ADRsXIQZyj1t7enkceeSR33313vv3tb+fnP/95rr/++jd87nCjkbzJmn8z7+TMaWvLf33qyfz1k0/kxpPnZzjJfznppJw1+Zg/nO6A60z4J4c5h0dG8r35p2TG+PGve86/mTkzH2lvz70D/Vm2fl3+86yZOevDF+Y//OVf5s4778zHP/7x3H777W/63puamvb/e9xrIdnS0uLqWmBUOXEDeEuOOeaYjBs3Lg8++GCS5NZbb81HPvKRzJs3L48//ni6u7vTaDRyxx13/NFr+/r6Mjw8nGXLluW6667LunXrkiRHH310Xnrppdc9t7mlJadNn56/792RV1673ckfHoptamrKv599fNa9+GKeHBzMkmPac+v27fsvhNi0e3caIyP54KRJ+YfX7oW3eufO7BwaesP3tri9PT/o6dn/8++vtt26Z09Onjgxy2fOyoltbendtTvPDwzkxBNPzFe/+tVcdNFF+w8v/96SJUvywx/+MEnyox/9KGedddb//48L8A7ZYwe8qYGBgf2HV5NkxYoVueWWW7J8+fLs3bs3CxYsyPLlyzN+/PjccMMNueCCCzJ58uTMmzcvkyZNet1a3d3dueqqqzI8PJzW1tbccMMNSZKrr746F1xwQebOnbv/PLRxEybk1OOPz57Nv8un1z2c1qamXPFnnfnijBmvW3NCS0v+1Yzj8r+6u/NXJ52UZ/buzacfXpvhJMeOHZubTjk1/3La9Hztt4/nkrVrcsbEo9M5dmzGv8EFCV+ZOSt//eQTuXTtmgyNjOSUiRNz/Qfm5eae7jzwwgtpSXLa0Ufn+GnTcvfDD+fUU0/NmDFjMnv27Fx22WX7Yzd59Ry7q666Kj/4wQ/S0dGRW2655S3/7Tds2JBLLrkkAwMDueuuuzJnzpysXr36La8D/OloGjmY4ycAB2HXrl2ZOHFiGo1GLr/88lx99dX55Cc/+bbWuueee/Lbn/0sH199/zuea2hkJMMjIxnb3Jz1L72Uv3rid7ljwQff9nr/98Pn5AMXX5yPfexj73g2gNFkjx0war773e/m1ltvzcsvv5wLL7wwn/jEJ972Wp2dnVkzYUL2tbRkzDs8D22w0cgXH3kkQyMjGdPclOtOPOltr7WvpSW7JkxIZ2fnO5oJ4N1gjx1wWHruuedyy/e+lyX3P5ApL754qMfZr2/SpKw65+xc9Rd/kWOPPXZU1vzZz36Wb3zjG6/73eLFi3PjjTeOyvrAnw577IDDUkdHR8YfdVS2d3QcVmG3/X2vztXR0TFqa1588cW5+OKLR2094E+Xq2KBw1JLS0tOW7gwW2fNTOMw+eaFRnNztsycmdMXLfqjGxIDHA4Oj09LgDdwxhlnZF9bW56ZMuVQj5Ik2TZlSoba2nL66acf6lEA3pCwAw5b7e3tef+cOfnd7FkZ/ic3+D0Uhpua8sTsWXn/3Llpb28/pLMAHIiwAw5ri887L7umTMnmP7h/3Xtt04wZ2TVlShYvWXJI5wB4M8IOOKxNmzYtH1q8OI/PmZMX29oOyQwvtLXlt3Pn5KwlSzJt2rRDMgPAwRB2wGFv8eLFaT9uRtacfHKG3uMLKYaam7Nm/snpmDEj55577nu6bYC3StgBh73W1tZ8YunSDE6fngdOmf+enW833NSUB06Znz3TpueSpUvT2uoOUcDhTdgBR4SpU6fmss8uS/+sWVl96inv+p67oebmrD71lPTPmpXLPrssU6dOfVe3BzAafPMEcETZsmVLfnzb36WtpyeLHnsskwYHR30bL7S1Zc38k7Nn2vRc9tllmT179qhvA+DdIOyAI86OHTvy05UrM/BMd+Zt3pw53d1pHoWPsuGmpmyaMSO/nTsnHTNm5JKlS+2pA44owg44Ig0NDaWrqysPdnVlYl9fTtyyNTP7+tIyPPyW12o0N2fblCl5Yvas7JoyJWctWZJzzz3XOXXAEUfYAUe0np6e3NfVlac2bUrr4GBmb9uWac/3Z/Lu3RnTaBzwdftaWvLCUUdl+/s6smXmzAy1teX9c+dmsVuaAEcwYQeUMDAwkA0bNmTDmjXZu3t3RoaGMnHPnkzqH8jYoaE0jwxnuKk5r7S25sWO9uyaMCFNra0Zf9RROX3Ropx++um+UQI44gk7oJRGo5H+/v709vamt7c3z+3YkVf27k1jaCgtra0ZO358jp06NZ2dnens7ExHR0daWloO9dgAo0LYAQAU4T52AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUMT/A0Aw27cq4N4uAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fitted_pipeline = est.fitted_pipeline_ # access best pipeline directly\n", "fitted_pipeline.plot() #plot the best pipeline" @@ -554,240 +347,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
roc_auc_scorecomplexity_scorerParentsVariation_FunctionIndividualSubmitted TimestampCompleted TimestampPareto_FrontInstance
01.015.0NaNNaN['LogisticRegression_1']1.692231e+091.692231e+09NaN['LogisticRegression_1']
10.9661945.0NaNNaN['DecisionTreeClassifier_1']1.692231e+091.692231e+09NaN['DecisionTreeClassifier_1']
20.997467.0NaNNaN['KNeighborsClassifier_1']1.692231e+091.692231e+09NaN['KNeighborsClassifier_1']
30.99642915064.0NaNNaN['GradientBoostingClassifier_1']1.692231e+091.692231e+09NaN['GradientBoostingClassifier_1']
40.9957142802.0NaNNaN['ExtraTreesClassifier_1']1.692231e+091.692231e+09NaN['ExtraTreesClassifier_1']
..............................
1380.56.0(98,)mutate[('BernoulliNB_1', 'SelectFromModel_ExtraTrees...1.692231e+091.692231e+09NaN[('BernoulliNB_1', 'SelectFromModel_ExtraTrees...
1390.854961.0(82, 87)crossover['MultinomialNB_1']1.692231e+091.692231e+09NaN['MultinomialNB_1']
1400.9975798210.0(86,)mutate['ExtraTreesClassifier_1']1.692231e+091.692231e+09NaN['ExtraTreesClassifier_1']
1410.97900814.8(98,)mutate['SGDClassifier_1']1.692231e+091.692231e+09NaN['SGDClassifier_1']
1420.51500.0(2,)mutate['XGBClassifier_1']1.692231e+091.692231e+09NaN['XGBClassifier_1']
\n", - "

143 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " roc_auc_score complexity_scorer Parents Variation_Function \\\n", - "0 1.0 15.0 NaN NaN \n", - "1 0.96619 45.0 NaN NaN \n", - "2 0.99746 7.0 NaN NaN \n", - "3 0.996429 15064.0 NaN NaN \n", - "4 0.995714 2802.0 NaN NaN \n", - ".. ... ... ... ... \n", - "138 0.5 6.0 (98,) mutate \n", - "139 0.85496 1.0 (82, 87) crossover \n", - "140 0.997579 8210.0 (86,) mutate \n", - "141 0.979008 14.8 (98,) mutate \n", - "142 0.5 1500.0 (2,) mutate \n", - "\n", - " Individual Submitted Timestamp \\\n", - "0 ['LogisticRegression_1'] 1.692231e+09 \n", - "1 ['DecisionTreeClassifier_1'] 1.692231e+09 \n", - "2 ['KNeighborsClassifier_1'] 1.692231e+09 \n", - "3 ['GradientBoostingClassifier_1'] 1.692231e+09 \n", - "4 ['ExtraTreesClassifier_1'] 1.692231e+09 \n", - ".. ... ... \n", - "138 [('BernoulliNB_1', 'SelectFromModel_ExtraTrees... 1.692231e+09 \n", - "139 ['MultinomialNB_1'] 1.692231e+09 \n", - "140 ['ExtraTreesClassifier_1'] 1.692231e+09 \n", - "141 ['SGDClassifier_1'] 1.692231e+09 \n", - "142 ['XGBClassifier_1'] 1.692231e+09 \n", - "\n", - " Completed Timestamp Pareto_Front \\\n", - "0 1.692231e+09 NaN \n", - "1 1.692231e+09 NaN \n", - "2 1.692231e+09 NaN \n", - "3 1.692231e+09 NaN \n", - "4 1.692231e+09 NaN \n", - ".. ... ... \n", - "138 1.692231e+09 NaN \n", - "139 1.692231e+09 NaN \n", - "140 1.692231e+09 NaN \n", - "141 1.692231e+09 NaN \n", - "142 1.692231e+09 NaN \n", - "\n", - " Instance \n", - "0 ['LogisticRegression_1'] \n", - "1 ['DecisionTreeClassifier_1'] \n", - "2 ['KNeighborsClassifier_1'] \n", - "3 ['GradientBoostingClassifier_1'] \n", - "4 ['ExtraTreesClassifier_1'] \n", - ".. ... \n", - "138 [('BernoulliNB_1', 'SelectFromModel_ExtraTrees... \n", - "139 ['MultinomialNB_1'] \n", - "140 ['ExtraTreesClassifier_1'] \n", - "141 ['SGDClassifier_1'] \n", - "142 ['XGBClassifier_1'] \n", - "\n", - "[143 rows x 9 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "est.evaluated_individuals" ] @@ -801,113 +363,18 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
roc_auc_scorecomplexity_scorerParentsVariation_FunctionIndividualSubmitted TimestampCompleted TimestampPareto_FrontInstance
570.50.0(56,)mutate['LogisticRegression_1']1.692231e+091.692231e+091.0['LogisticRegression_1']
1371.01.0(82,)mutate[('MultinomialNB_1', 'SelectFromModel_ExtraTre...1.692231e+091.692231e+091.0[('MultinomialNB_1', 'SelectFromModel_ExtraTre...
\n", - "
" - ], - "text/plain": [ - " roc_auc_score complexity_scorer Parents Variation_Function \\\n", - "57 0.5 0.0 (56,) mutate \n", - "137 1.0 1.0 (82,) mutate \n", - "\n", - " Individual Submitted Timestamp \\\n", - "57 ['LogisticRegression_1'] 1.692231e+09 \n", - "137 [('MultinomialNB_1', 'SelectFromModel_ExtraTre... 1.692231e+09 \n", - "\n", - " Completed Timestamp Pareto_Front \\\n", - "57 1.692231e+09 1.0 \n", - "137 1.692231e+09 1.0 \n", - "\n", - " Instance \n", - "57 ['LogisticRegression_1'] \n", - "137 [('MultinomialNB_1', 'SelectFromModel_ExtraTre... " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "est.pareto_front" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pareto_front = est.pareto_front\n", "\n", @@ -929,26 +396,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 5/5 [00:36<00:00, 7.20s/it]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.998015873015873\n" - ] - } - ], + "outputs": [], "source": [ "import tpot2\n", "import sklearn\n", @@ -983,241 +433,6 @@ "The TPOTClassifier and TPOTRegressor are set default parameters for the TPOTEstimator for Classification and Regression.\n", "In the future, a metalearner will be used to predict the best values for a given dataset." ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "terminating parallel evaluation due to timeout\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 177.4640355714364, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 185.86338704440277, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 277.49028848926537, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 464.01662831846625, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 554.9558355270419, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1480.8552755513228, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2355.5063150407514, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2081.571493001771, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 3868.126368656056, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5331.3651033417555, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 6862.873289547279, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8656.98141344823, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 4311.308985096635, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5839.020132572099, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5923.854209526442, tolerance: 143.10199053030306\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 280.8815573984757, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 643.0934690993745, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 565.7529498867225, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 639.0793324268889, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 796.3080264698947, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2132.9185444641626, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2674.6467641871423, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2568.991994333919, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1767.4389212469105, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1605.1388315662043, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1771.0119939564029, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1812.8362937605707, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2090.934535113978, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2720.6381011917256, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 3694.640494319028, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5819.918714194559, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8499.700911721331, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 9747.96645780711, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8925.452311816742, tolerance: 168.2528\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 242.2927812706912, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 520.11185573088, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 270.07291585509665, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 312.4193137688562, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 334.48251612263266, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 406.0909651533002, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 393.6330031697871, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 419.26211581844836, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1105.061883097398, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1492.2850051816786, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1359.714203708456, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1543.3692570256535, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 686.7691507576965, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 567.2123847292969, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 575.1844139498426, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1196.4656488135224, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2136.7159360550577, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 3161.7749671411, tolerance: 166.10352603773586\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 158.83327397913672, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 168.01972272712737, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 529.342575648101, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 811.6219812278869, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 601.5064170324476, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 459.8468100364553, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 171.07939504506066, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 475.0977421862772, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1609.3130913197529, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 3371.636877565179, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 4893.275803661207, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5689.945571509306, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 6327.594264068524, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8071.667983187712, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 9214.471518416074, tolerance: 158.8069449056604\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 221.34985516022425, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 409.0736092341831, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 576.086710276315, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 759.3069202784682, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 788.3264070701553, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1851.77406217705, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1982.0810699927388, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1990.0643707137788, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1123.845644916175, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 466.2079415132757, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1319.3072104484309, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1714.5370268148836, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1605.753956191009, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 5471.587720631971, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:617: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 10655.474709162605, tolerance: 159.7256437735849\n", - " model = cd_fast.enet_coordinate_descent_gram(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-2514.9527497535055\n" - ] - } - ], - "source": [ - "import tpot2\n", - "import sklearn\n", - "import sklearn.metrics\n", - "import sklearn.datasets\n", - "\n", - "est = tpot2.tpot_estimator.templates.TPOTRegressor(n_jobs=4, max_time_seconds=10)\n", - "\n", - "\n", - "scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')\n", - "X, y = sklearn.datasets.load_diabetes(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "terminating parallel evaluation due to timeout\n", - "0.9999694758670971\n" - ] - } - ], - "source": [ - "import tpot2\n", - "import sklearn\n", - "import sklearn.datasets\n", - "\n", - "est = tpot2.tpot_estimator.templates.TPOTClassifier(n_jobs=4, max_time_seconds=10)\n", - "\n", - "\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_digits(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))" - ] } ], "metadata": { @@ -1236,7 +451,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/2_Defining_Search_Space_(config_dicts).ipynb b/Tutorial/2_Defining_Search_Space_(config_dicts).ipynb deleted file mode 100644 index efef82e8..00000000 --- a/Tutorial/2_Defining_Search_Space_(config_dicts).ipynb +++ /dev/null @@ -1,478 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Everything can be done with the TPOTEstimator class. All other classes (TPOTRegressor, TPOTClassifier, TPOTSymbolicClassifier, TPOTSymbolicRegression, TPOTGeneticFeatureSetSelector, etc.) are actually just different default settings for TPOTEstimator.\n", - "\n", - "\n", - "By Default, TPOT will generate pipelines with a default set of classifiers or regressors as roots (this depends on whether classification is set to true or false). All other nodes are selected from a default list of selectors and transformers. Note: This differs from the TPOT1 behavior where by default classifiers and regressors can appear in locations other than the root. You can modify the the search space for leaves, inner nodes, and roots (final classifiers) separately through built in options or custom configuration dictionaries.\n", - "\n", - "In this tutorial we will walk through using the built in configurations, creating custom configurations, and using nested configurations." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configuration Dictionaries\n", - "\n", - "The default configuration includes several machine learning estimators from sklearn. Sometimes we may want to change or restrict what is allowed. \n", - "\n", - "In TPOT2, we specify three different configuration dictionaries to indicate which modules can go where on the graph\n", - "\n", - "1. root_config_dict : Specifies the modules allowed to be placed in the root node. This is the final classifier or regressor. (It can also technically be used as a transformer if the scoring function knows that.). You are guaranteed a root node in every graph pipeline.\n", - "2. inner_config_dict : Specifies the modules allowed to be placed all nodes that are not the root node. If leaf_config_dict is set to None, then leaves will be pulled from this list. You are not guaranteed a node from this list however. It is still possible to end up with a graph that contains only a single root, or a root and a leaf even if this is set.\n", - "3. leaf_config_dict : Specifies the modules allowed to be placed as leafs. Unlike inner_config_dict, you are guaranteed to have a leaf node from this list if it is set. The smallest possible graph would thus be \\[leaf->root\\]. \n", - "\n", - "Note: TPOT1 internally divided the methods inside the configuration dictionary into selectors/transformers/estimators and treated them differently. TPOT2 does not. \n", - "\n", - "## Built in Defaults\n", - "\n", - "Each configuration dictionary parameter has access to the same default parameters. The default parameters can also be grouped into a list to combine their search spaces.\n", - "\n", - "- 'selectors' : A selection of sklearn Selector methods.\n", - "- 'classifiers' : A selection of sklearn Classifier methods.\n", - "- 'regressors' : A selection of sklearn Regressor methods.\n", - "- 'transformers' : A selection of sklearn Transformer methods.\n", - "- 'arithmetic_transformer' : A selection of sklearn Arithmetic Transformer methods that replicate symbolic classification/regression operators.\n", - "- 'passthrough' : A node that just passes though the input. Useful for passing through raw inputs into inner nodes.\n", - "- 'feature_set_selector' : A selector that pulls out specific subsets of columns from the data. Only well defined as a leaf.\n", - " Subsets are set with the subsets parameter.\n", - "- list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary.\n", - "\n", - "\n", - "## Other search space parameters\n", - "\n", - "1. linear_pipeline : If True, pipelines will be linear\n", - "2. max_size : The maximum number of nodes in the pipeline.\n", - "\n", - "\n", - "\n", - "## defining configuration dictionaries\n", - "\n", - "Configuration dictionaries are python dictionaries where the keys are the method types and the values are optuna-compatible functions that take in a trial and return a hyperparameter dictionary.\n", - "\n", - "\n", - "Configuration dictionaries can also be nested. Meaning that the search space for that node, will be a graph defined by the nested dictionary. More on that later in the tutorial. \n", - "\n", - "With these three types of configuration dictionaries plus nesting, one can define very specific search spaces. More on nesting later." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.0\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbtklEQVR4nO3de5DfdX3v8ddecttcd03ZXCBBIZCAEG5Km6A1FWVGK6L0QL0NXuqpHKt2HDnSQ8+MeOnMabDgDVoPMp5OaU05IkYU9dTqdAiIkEi4Q8CQyya7JuwmIdnc9nL+AGKBBALZEPbN4/FXdvf3+34/v98vM+/n/L6/7/fXMDg4OBgAAIa9xkO9AAAAhoawAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKEHQBAEcIOAKAIYQcAUISwAwAoQtgBABQh7AAAihB2AABFCDsAgCKaD/UCXgr9/f3p7u5OV1dXurq6sqGzMzu3b89Af38am5oyasyY/N6UKWlvb097e3va2trS1NR0qJcNADwH8/3ZGgYHBwcP9SIOlp6enixfvjx3L1uWHdu2ZbCvL+O2b8/E7u6M6OtL4+BgBhoasru5OZvb2rJ1zJg0NDdn9NixOeGUUzJ37ty0trYe6ocBAPwn5vu+lQy7devW5Zabb87KFSsyorc3M1avydTu7kzcti0j+vv3eb/dTU3ZPHZs1re1ZfWMI7K7pSWvnjUr89/whkydOvUlfAQAwDOZ78+vVNj19fVlyZIluX3JkozbuDFHr1qdwzduTNPAwAveVn9jY9ZOnpyHZ87I1smT87r58zN//vw0N78ijl4DwMuG+b7/yoRdZ2dnfrh4cXrWdmT2ihWZ1dGRxiF4aAMNDVkxfXoemDUrbYdPz9vOPjtTpkwZghUDAM/HfH9hSoTdqlWr8r1Fi9Kybn1Ovf/+TOjtHfJ9bGlpydI5c9I7bVredf55mTlz5pDvAwD4HfP9hRv2Ybdq1ap891/+Ja9atTqvv+++NL+It2X3V19jY247/rh0z5iRc9/znmH/4gPAy5X5/uIM6+vYdXZ25nuLFqVt1er8/r33HtQXPUmaBwbyB/fcm7bVq/O9Rf+azs7Og7o/AHglMt9fvGEbdn19ffnh4sVpWbc+p99335Acb98fjYODOf3e+zJm/br8aPHi9PX1vST7BYBXAvP9wAzbsFuyZEl61nbk1PvvP+gl/0zNAwM59b77093RkVtuueUl3TcAVGa+H5hhGXbr1q3L7UuWZPaKFQflg5T7Y2Jvb459aEV+dfPNWb9+/SFZAwBUYr4fuGEZdrfcfHPGbdyYWR0dh3Qdx3R0ZNzGjVly882HdB0AUIH5fuCGXdj19PRk5YoVOXrV6pfsuPu+NA4O5qhVq7PyoYfS09NzSNcCAMOZ+T40hl3YLV++PCN6e3P4xo2HeilJkiM2bkxzb2/uuuuuQ70UABi2zPehMazCrr+/P3cvW5YZq9e8qK8RORiaBgYyc82a3LV0afqf43vqAIC9M9+HzosKu8mTJx/wjv/sz/4sjzzyyD7/fsUVV2TXrl17fl6wYEG6u7uzY9u2TO3uftbt33/XXTlr6R15x7Jlefedv859W7ce8Br319THnljXjTfemHe+852ZN29eFi9enCS54447ctFFFw3Zvn71q1/ltNNOy4gRI3LjjTcO2XYB4Ll8/vOfz/HHH58TTjghp512WlauXLnP277QTnhqvv/H0qXZ9Z/CbsHtv8o7li3NO5YtzYfuuTsb/lMXvBSmPtadH//4x+l+sjvWrVuX973vfUmSb3/72/nMZz7zgrd59dVXZ9asWWloaMjWg9Aqh+wdu6uvvjpHHXXUPv/+zLD7+c9/nq6urgz29WXSPp6Ir82ekx+cckr+dMrU/O2j+/4Pt7/69/MY/5ju7mzfujWXXHJJFi9enFtvvTXnnntuNm7cmNNOOy0LFy484LU8Zdq0afnWt76V97znPUO2TQB4Lrfcckt+/vOf584778zdd9+dG264IZMmTRqy7T8136/7zSPZ/YzZ+525J+UHp5ya144bn79fs2a/tre/8/v5TNy2Lb+4+eZ0dXUleWIGX3vttQe0zdNPPz0//elPD9q3WzQP1YaWLVuWj33sY9m+fXtOPvnkfPOb38zo0aPz/e9/PxdddFEmTpyYE088Ma2trbnsssvypje9KV//+tczZ86cXHDBBVm2bFmampry6U9/Or29vVm3bl3mzZuXI488MosXL87kyZOzaNGijNu+Pf971aP54YYNaUjy7vYp+dD06U9by6kTJuSajrVJnnhx/3blyty+ZXN2Dwzmo4cfnrMPOyy9/f35zIMPZuX23swdPyG/3LwpPzzl1Nzz+OP5xprVGdnYmM19ffk/rz0hlz7ycFb09mZwMPnMkUdmfmtrfrlpUz7/yMPJwEAak3x47tyMHTt2zxr6+vrygx/8IK2trfnWt76Va665Jo899lg+9alPZc2aNWltbc1Xv/rVzJgxI5/4xCcyYcKELFu2LN3d3bn88sszb968vT7Po0aNyrRp07Jr165s3rw5GzZsGKqXEAD26sEHH8z48eOzadOmJE/Mor6+vixatCgLFy7Mzp07c+yxx+YrX/lKRo4cmcHBwT3z6atf/WpuvPHG7Ny5M+edd14+/vGPJ0kuv/zy3HDDDUmSefPmZcvKR7Nh166cv/zOTB81KlfNOS6DSQYGBjLY1JTXTZyQf1y3bp9z/fqurvys+7Fs3t2XiSOa87mjjs7/fHhFOnbsTGND8pXZc3LkmDH55to1+fHGjdk9MJBzDmvPRw4/PLdt2pSr1q7JmMamPNLbmze1teV/vOY1+dojj2THjh0555xzcuaZZ+biiy/On/zJn+SOO+542vOzYcOG/Pmf/3lWr16dESNG5Morr8zJJ5+81+fyhBNOOCiv0VOGLOwuuOCCXH311Tn99NNz4YUX5sorr8yFF16YT37yk1myZEmmTJmSM888M6eddtrT7nfnnXdm5cqVue+++5IkmzdvzsSJE7Nw4cLccsstGTdu3J7bbujszMrly3Prpk25/qSTM7KxMZt2737WWn7R3Z03t70qSXJdV2cOGzky1590cnb09+e/LF+eN7S25v92dWb66FG58rjjsmRTT67/bdee+9+zdWtuOuXUtI8alS8/+mgWtLXlfx1zbLp378577lqem04+JX+/8jf52KRJOa2lJVv7+9OxZXN6nnGI+MMf/vCefx922GHPWuczn4unnHPOOc/zbD9h0aJF+3U7ABgKe5tlT7nnnnvy3e9+9zlve+mll+bSSy991u/nHHNMzmo/LP/R1JQr2tvT0tiYzq7O9Pf3p+u3XdnW1Jyfbt6UY1vG7nOuJ8kD27bl+yednHHNzfnUA/dnQVtbzp8yNbsGBtI3OJibe3rSuXNnvjv3pAwk+dA9d++5731bt+ZHp5yaCc3NefuypfngtGn59JFH5p82/DZfvPTS/On73pdHH310r4/9L//yL/NXf/VXed3rXpcVK1bk/e9/f2677bYX8MwOnSEJu02bNmXnzp05/fTTkyQf+MAHsnDhwvzRH/1RZs+encMPPzxJcu6552bVqlVPu+9rXvOarFu3Lh//+Mfzzne+M29961v3uZ+d27fn/rVrc277lIxsfOIo8qQRI/b8/RMP3J9dAwPZ2t+fxSefkiRZ0tOTh3p78/0Nv02SbO3vy5odO7Jsy+P5r0+ua/6k1kxq/t1TccqECWkfNeqJ+2/qyS+6H8uVT779u72/P4/29OT4UaPyzccey6pdu/KmcePSvGtXpra3Z8XDD7/4JxIAXoEmTpiQ5n1ckPgvOjrSkOSokaNy0VFH569XPLTXuZ4kb5jUmnFPzvM7Nm/O5cfOTpKMbGzMyCQ3b+rJL7p7cseWXydJtvX3Z+X27ZnU3JyTx0/I5JEjkySzWsamY+fOTBs9Og1Jdj25/X35t3/7t9x77717fj6Ul0gZsnfs9mZwP45xt7a25u67786PfvSjXH755fnpT3+ayy67bK+3HejvT55jm1+bPSezWlryNyt/ky/+5pF8Y85xGUjyhaOPzusnTnrm6va5nTGNv/vo4cDgYP7+uOMzffToPb/bsmVL3tfamtNbWnJrb2/+W0dH/vtxx+XYo4/OfyxZ8ryPGQD4nebGxjTs42zYr0+fnpbGxjSkIROam/c51x/u7c3opuc+dWBgMPmLGTPy7vb2p/3+tk2bMrKxYc/PTQ1PzP+n9O/H98becccdaW4+qFm1X4bk5IlJkyZl1KhRuf3225Mk1157bd74xjdm9uzZeeCBB9LR0ZH+/v5cf/31z7rvxo0bMzAwkPPOOy+f+9zncueddyZJxo8fn8cff/zpi21qygnTpuW7XZ17zpp55qHYhoaGfHrmkblzy5b8prc3Z0xqzbXr1+/5IOVD27alf3AwJ0+YkJuevFbOrZs2ZdM+XrT5ra35x3Xr9vx839atGT9+fDr7+3P0qFH5QGtrZo4YkQ29vel58rMHAMD+27Z9ewYbG9PS2Jjtewm8xsbGTJw0KQ3JPuf6M502cWKu6+pMkuwaGEhvf3/OaJ2U67o6s/3Jy5es3bEjjz9PtDU2NKSh8blzacGCBbnqqqv2/Lx8+fLnvP3B9KLSsqenZ8/h1SRZuHBhvv3tb+fCCy/Mjh07ctJJJ+XCCy/M6NGjc8UVV2TBggWZOHFiZs+enQkTJjxtWx0dHfngBz+YgYGBNDc354orrkiSfPSjH82CBQtyzDHH7Ll0yKgxY/LaI4/M9hUP55w7f53mhoace1h7LnjGyRNjmpry4emH55qOjlx69NFZu2NHzvn1sgwk+b2RI3P18a/N+6ZOy2cefCBvW7Y0c8eNT/vIkRm9lxfu40fMyBd/80jesWxp+gYHc/y4cbns2Nn5wc6d+WVPTzI4mGNHjszMKVPyoyc/J/iUf/7nf86YMWOedvLEJz/5yaxdu/ZZJ0+84x3vyFvf+tZs3bo1f/iHf5ilS5fu9bm/99578973vjebN2/O6NGj8+pXvzo33XTTC30JAWC/LV++PBdffPGey3OceOKJueyyy3LbbbflS1/6Unbt2pXGxsZ88YtfzPz58/e8sZMkV111Vb7zne9kcHAwEyZMyDXXXJPDDjssX/7yl3PDDTc88S5XY2NGjhuf90yfns90dubI0aNz1Zzj0rR2bdoPa8+4ESPy1Ptp502Zste5/kyXvOaoXLLiofzTunVpbmjM5bNn542tbXm4tzfnLb8zA0nGNzfn67PnPOdjnz9rVv76c5/LL5cuzcUXX7zX23zta1/Lxz72sVx99dXZtWtXzj777MydO3evt/2Hf/iHfOELX0hnZ2eOPfbYnH/++fm7v/u7/Xod9kfD4P4cLz0AW7duzbhx49Lf3593v/vd+ehHP5o//uM/flHb+tnPfpYHf/KTvOXWXx7wuvoGBzMwOJiRjY1Z/vjjufSRh3P9SXs/g+X57Ny5Mz9+/ety0/3359///d+TJM3NzVm/fv2QXPMPACobyvk+1P7fH/x+jj3rrLz5zW8+1EvZLwf9YPBVV12Va6+9Njt37syZZ56Zt7/97S96W+3t7Vk6Zkx2NzVlxAFeBbq3vz8X3H13+gYHM6KxIZ876ugXva3GlpYMvOpV+chHPpLx48dnw4YN+exnPyvqAGA/DOV8H0q7m5qydcyYtD/jM3kvZwc97C666KIh++aF9vb2NDQ3Z/PYsZm8ZcsBbWtCc3O+t49rzLxQm8eOTUNzc97ylrfkve9975Bs8yc/+Uk++9nPPu138+fPzze+8Y0h2T4AvFwM5XwfSk/N9xcTdl/60pdy3XXXPe13n/rUp/KhD31oqJa3V4f+9I0XoK2tLaPHjs36traX1Qu//lVPrKutrW3ItnnWWWflrLPOGrLtAcDLVcX5fskll+SSSy45CKt6bofsK8VejKamppxwyilZPeOI9D/PGSovlf7Gxqw64oiceOqpaWpqOtTLAYBhx3wfOi+PZ+8FmDt3bna3tGTty+Tza2smT05fS0tOPPHEQ70UABi2zPehMezCrrW1Na+eNSsPz5yRgYaG57/DQTTQ0JBHZs7Iq485Jq1PfiUJAPDCme9DY9iFXZLMf8MbsnXy5Kx4xvXrXmoPTZ+erZMnZ/4ZZxzSdQBABeb7gRuWYTd16tS8bv78PDBrVra0tBySNWxuacmDx8zK6884I1OnTj0kawCASsz3Azcswy554tIfrYdPz9I5c9L3En/Qsq+xMUuPm5O26dMzb968l3TfAFCZ+X5ghm3YNTc35+1nn53eadNy2/HHvWTH4wcaGnLb8cdl+9RpedvZZ78svvAXAKow3w/MsA27JJkyZUredf556Z4xI7e+9viDXvZ9jY259bXHp3vGjLzr/PMyZcqUg7o/AHglMt9fvIP+XbEvhVWrVuV7i/41LevW5dT778+E3t4h38fmlpYsPW5Otk+dlnedf15mzpw55PsAAH7HfH/hSoRdknR2duaHixenZ21HZq9YkVkdHWkcgoc20NCQh6ZPz4PHzErb9Ol529lnD+uSB4DhxHx/YcqEXZL09fVlyZIluX3JkozbuDFHrVqdIzZuTNPAwAveVn9jY9ZMnpxHZs7I1smT8/ozzsi8efOG7TF3ABiuzPf9VyrsnrJu3brcsmRJVj70UJp7ezNzzZpMfaw7E7dty4j+/n3eb3dTUzaPHZv1r2rLqiOOSF9LS159zDGZP0xPeQaASsz351cy7J7S09OTu+66K3ctXZod27ZlsK8v47Zvz4Tunozs60vj4EAGGhqzq7k5W9pas3XMmDQ0N2f02LE58dRTc+KJJw67K04DQHXm+76VDrun9Pf3p7u7O11dXenq6sqGzs7s2rEj/X19aWpuzsjRo/N7U6akvb097e3taWtrG1Zf+AsAr0Tm+7O9IsIOAOCVYFhfxw4AgN8RdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgCGEHAFCEsAMAKELYAQAUIewAAIoQdgAARQg7AIAihB0AQBHCDgCgiP8PJSigRRDUHoIAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# A Linear pipeline starting with a selector, followed by 0 to 4 transformers, and ending with a classifier.\n", - "\n", - "import tpot2\n", - "import sklearn\n", - "import sklearn.datasets\n", - "\n", - "est = tpot2.TPOTEstimator( population_size=10,\n", - " generations=5,\n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= \"transformers\",\n", - " leaf_config_dict=\"selectors\",\n", - " linear_pipeline=True,\n", - " max_size=6,\n", - "\n", - " early_stop=5,\n", - " verbose=0)\n", - "\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9941520467836257\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# A Graph pipeline starting with at least one selector as a leaf, potentially followed by a series\n", - "# of stacking classifiers or transformers, and ending with a classifier. The graph will have at most 15 nodes.\n", - "\n", - "import tpot2\n", - "import sklearn\n", - "import sklearn.datasets\n", - "import numpy as np\n", - "\n", - "est = tpot2.TPOTEstimator( population_size=10,\n", - " generations=5,\n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= [\"classifiers\",\"transformers\"],\n", - " leaf_config_dict=\"selectors\",\n", - " max_size=15,\n", - "\n", - " early_stop=5,\n", - " verbose=0)\n", - "\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Custom Configuration Dictionaries\n", - "\n", - "\n", - "Next, we will show how to use these features to define a graph pipeline search space similar to symbolic classification.\n", - "\n", - "The following defines a pipeline where leafs select a single feature, inner nodes perform arithmetic, and logistic regression is used as a final classifier.\n", - "\n", - "The arithmetic transformer and feature set selection of single columns are built in configurations with the \"arithmetic_transformer\" and \"feature_set_selector\" options respectively. \n", - "\n", - "There is not a built in configuration for a single logistic regression so we have to manually define one.\n", - "\n", - "### Parameter function\n", - "To start, we create a function that takes in a trial object. This object takes in a search space, and outputs a parameter. This is designed to be compatible with the optuna trial class. More information on available functions within trial can be found here: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html\n", - "\n", - "The suggested parameters should be put into a dictionary that has the model parameters as keys with their corresponding values.\n", - "\n", - "Note: For optuna optimization to work, it is important to add '_{name}' to each of the names parameters. With large graphs, names of parameters will likely clash. The name parameter here allows TPOT2 to make sure each parameter for each node has a unique label. \n", - "\n", - "Note: This will be simplified in a future release.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import tpot2\n", - "import numpy as np\n", - "def params_LogisticRegression(trial, name=None):\n", - " params = {}\n", - " params['solver'] = trial.suggest_categorical(name=f'solver_{name}',\n", - " choices=[f'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])\n", - " params['dual'] = False\n", - " params['penalty'] = 'l2'\n", - " params['C'] = trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True)\n", - " params['l1_ratio'] = None\n", - " if params['solver'] == 'liblinear':\n", - " params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2'])\n", - " if params['penalty'] == 'l2':\n", - " params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False])\n", - " else:\n", - " params['penalty'] = 'l1'\n", - "\n", - " params['class_weight'] = trial.suggest_categorical(name=f'class_weight_{name}', choices=['balanced'])\n", - " param_grid = {'solver': params['solver'],\n", - " 'penalty': params['penalty'],\n", - " 'dual': params['dual'],\n", - " 'multi_class': 'auto',\n", - " 'l1_ratio': params['l1_ratio'],\n", - " 'C': params['C'],\n", - " }\n", - " return param_grid" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### configuration dictionary\n", - "A configuration dictionary has the python Types for the designed estimator as keys, and the function as values." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "from sklearn.linear_model import LogisticRegression\n", - "root_config_dict = { LogisticRegression : params_LogisticRegression }" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.0\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=20,generations=10, \n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " inner_config_dict= \"arithmetic_transformer\",\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " )\n", - "\n", - "#load iris\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Recursive Configuration Dictionaries (EXPERIMENTAL)\n", - "\n", - "Configuration dictionaries can also be nested. If the string \"Recursive\" is used in place of a type, the node that would go in that place will now represent a graph with those restrictions. \n", - "\n", - "All inputs to the recursive node will be merged and input to all the leaves within the recursive graph. The output of the graph will be sent to the outputs of the node that represents it. \n", - "\n", - "This is handy for restricting the search space of the model as well as setting specific ensembling templates.\n", - "\n", - "(Currently) These are all flattened and merged into a single graph when exported as a graph pipeline. In the future these could be used for ensemble methods such as boosting/stacking/etc.\n", - "\n", - "Note that this is not a new instance of the TPOT2 estimator, and it does not independently run GP. Rather this recursive node just sets a search space restriction for that node." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "transformer_config_dictionary = \"transformers\"\n", - "selector_config_dictionary = \"feature_set_selector\"\n", - "classifier_config_dictionary = root_config_dict \n", - "\n", - "#Some example search spaces with nested graphs\n", - "\n", - "#pipelines of the shape selector->transformer\n", - "st_params = { \n", - " 'root_config_dict':transformer_config_dictionary,\n", - " 'leaf_config_dict':selector_config_dictionary,\n", - " 'inner_config_dict': None,\n", - " 'max_size' : 2, \n", - " 'linear_pipeline' : True}\n", - "\n", - "#pipelines of the shape (selector->transformer) -> classifier. \n", - "# This is equivalent to setting TPOT1 to use the 'Selector-Transformer-Classifier' template\n", - "st_c_params = { \n", - " 'root_config_dict': classifier_config_dictionary,\n", - " 'leaf_config_dict': {\"Recursive\" : st_params},\n", - " 'inner_config_dict': None,\n", - " 'max_size' : 2, \n", - " 'linear_pipeline' : True}\n", - "\n", - "#pipelines of the shape ((selector->transformer) -> classifier)*N) -> classifier\n", - "#This is like having an ensemble of 'Selector-Transformer-Classifier' models with a final meta classifier\n", - "st_c_ensemble_params = { \n", - " 'root_config_dict': classifier_config_dictionary,\n", - " 'leaf_config_dict': {\"Recursive\" : st_c_params},\n", - " 'inner_config_dict': None,\n", - " 'max_size' : 6, \n", - " 'linear_pipeline' : True}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9880174291938998\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# linear pipelines of the shape selector->transformer->classifier\n", - "est = tpot2.TPOTEstimator(population_size=20,generations=10, \n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " **st_c_params,\n", - " )\n", - "\n", - "#load iris\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9976851851851851\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# ensembles of linear pipelines of the shape selector->transformer->classifier ensemble pipeline with a final meta classifier\n", - "est = tpot2.TPOTEstimator(population_size=20,generations=10, \n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " **st_c_ensemble_params,\n", - " )\n", - "\n", - "#load iris\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "est.fitted_pipeline_.plot()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "tpot_dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "7fe1fe9ef32cd5efd76326a08046147513534f0dd2318301a1a96ae9071c1c4e" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index e21d6c5c..940509de 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -31,7 +31,7 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 1, 'p': 1, 'weights': 'distance'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 1, 'p': 3, 'weights': 'uniform'}\n" ] } ], @@ -43,9 +43,9 @@ "knn_configspace = ConfigurationSpace(\n", " space = {\n", "\n", - " 'n_neighbors': Integer(\"n_neighbors\", bounds=(1, 10)),\n", + " 'n_neighbors': (1, 10),\n", " 'weights': Categorical(\"weights\", ['uniform', 'distance']),\n", - " 'p': Integer(\"p\", bounds=(1, 3)),\n", + " 'p': (1, 3),\n", " 'metric': Categorical(\"metric\", ['euclidean', 'minkowski']),\n", " 'n_jobs': 1,\n", " }\n", @@ -154,9 +154,9 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 8, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}\n", "mutated hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'distance'}\n" ] } ], @@ -187,14 +187,14 @@ "output_type": "stream", "text": [ "original hyperparameters for individual 1\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'distance'}\n", "original hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 3, 'p': 3, 'weights': 'distance'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n", "\n", "post crossover hyperparameters for individual 1\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 9, 'p': 3, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}\n", "post crossover hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 3, 'p': 3, 'weights': 'distance'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n" ] } ], @@ -233,10 +233,414 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(n_jobs=1, n_neighbors=9, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(n_jobs=1, n_neighbors=9, p=3)" + "KNeighborsClassifier(n_jobs=1, p=1)" ] }, "execution_count": 5, @@ -272,7 +676,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -386,13 +790,417 @@ { "data": { "text/html": [ - "
LogisticRegression(C=0.4989834645092814, class_weight='balanced', dual=True,\n",
-       "                   max_iter=1000, n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression(C=0.5857355940220703, class_weight='balanced', dual=True,\n",
+       "                   max_iter=1000, n_jobs=1, penalty='l1', solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "LogisticRegression(C=0.4989834645092814, class_weight='balanced', dual=True,\n", - " max_iter=1000, n_jobs=1)" + "LogisticRegression(C=0.5857355940220703, class_weight='balanced', dual=True,\n", + " max_iter=1000, n_jobs=1, penalty='l1', solver='saga')" ] }, "execution_count": 7, @@ -422,13 +1230,417 @@ { "data": { "text/html": [ - "
DecisionTreeClassifier(max_depth=9, max_features='log2', min_samples_leaf=12,\n",
-       "                       min_samples_split=4)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression(C=2.032340407557342, class_weight='balanced', max_iter=1000,\n",
+       "                   n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(max_depth=9, max_features='log2', min_samples_leaf=12,\n", - " min_samples_split=4)" + "LogisticRegression(C=2.032340407557342, class_weight='balanced', max_iter=1000,\n", + " n_jobs=1)" ] }, "execution_count": 8, @@ -466,16 +1678,431 @@ "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "unhashable type: 'list'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#same pipeline search space as before.\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m classifier_choice \u001b[38;5;241m=\u001b[39m \u001b[43mtpot2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mKNeighborsClassifier\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mLogisticRegression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDecisionTreeClassifier\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msampled pipeline 1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5\u001b[0m classifier_choice\u001b[38;5;241m.\u001b[39mgenerate()\u001b[38;5;241m.\u001b[39mexport_pipeline()\n", - "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:169\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_search_space\u001b[39m(name, n_classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, n_samples\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 169\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[43mGROUPNAMES\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 1\n" ] + }, + { + "data": { + "text/html": [ + "
DecisionTreeClassifier(max_depth=30, max_features='sqrt', min_samples_leaf=3,\n",
+       "                       min_samples_split=18)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "DecisionTreeClassifier(max_depth=30, max_features='sqrt', min_samples_leaf=3,\n", + " min_samples_split=18)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -488,7 +2115,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -501,10 +2128,417 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=96)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(max_depth=19, max_features='sqrt', min_samples_leaf=8,\n",
+       "                       min_samples_split=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=96)" + "DecisionTreeClassifier(max_depth=19, max_features='sqrt', min_samples_leaf=8,\n", + " min_samples_split=5)" ] }, "execution_count": 10, @@ -519,23 +2553,435 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'AdaBoostClassifier'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#search space for all classifiers\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m classifier_choice \u001b[38;5;241m=\u001b[39m \u001b[43mtpot2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclassifiers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msampled pipeline 1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5\u001b[0m classifier_choice\u001b[38;5;241m.\u001b[39mgenerate()\u001b[38;5;241m.\u001b[39mexport_pipeline()\n", - "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:180\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m#if list of names, return a list of EstimatorNodes\u001b[39;00m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, \u001b[38;5;28mlist\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[0;32m--> 180\u001b[0m search_spaces \u001b[38;5;241m=\u001b[39m [get_search_space(n, n_classes\u001b[38;5;241m=\u001b[39mn_classes, n_samples\u001b[38;5;241m=\u001b[39mn_samples, random_state\u001b[38;5;241m=\u001b[39mrandom_state) \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:180\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m#if list of names, return a list of EstimatorNodes\u001b[39;00m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, \u001b[38;5;28mlist\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[0;32m--> 180\u001b[0m search_spaces \u001b[38;5;241m=\u001b[39m [\u001b[43mget_search_space\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_classes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:183\u001b[0m, in \u001b[0;36mget_search_space\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ChoicePipeline(choice_list\u001b[38;5;241m=\u001b[39msearch_spaces)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_estimatornode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_classes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/config/get_configspace.py:190\u001b[0m, in \u001b[0;36mget_estimatornode\u001b[0;34m(name, n_classes, n_samples, random_state)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_estimatornode\u001b[39m(name, n_classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, n_samples\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 187\u001b[0m configspace \u001b[38;5;241m=\u001b[39m get_configspace(name, n_classes\u001b[38;5;241m=\u001b[39mn_classes, n_samples\u001b[38;5;241m=\u001b[39mn_samples, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[0;32m--> 190\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m EstimatorNode(\u001b[43mSTRING_TO_CLASS\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m, configspace)\n", - "\u001b[0;31mKeyError\u001b[0m: 'AdaBoostClassifier'" + "name": "stdout", + "output_type": "stream", + "text": [ + "sampled pipeline 1\n" ] + }, + { + "data": { + "text/html": [ + "
ExtraTreesClassifier(max_features=0.40389574491352287, min_samples_leaf=15,\n",
+       "                     min_samples_split=13, n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "ExtraTreesClassifier(max_features=0.40389574491352287, min_samples_leaf=15,\n", + " min_samples_split=13, n_jobs=1)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -548,7 +2994,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -561,19 +3007,417 @@ { "data": { "text/html": [ - "
GradientBoostingClassifier(learning_rate=0.5981565344248039, max_depth=6,\n",
-       "                           max_features=0.14704006316550916,\n",
-       "                           min_samples_leaf=18, min_samples_split=14,\n",
-       "                           subsample=0.36853097212587516)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
SVC(C=7.943520510912431, degree=1, kernel='linear', max_iter=3000,\n",
+       "    probability=True)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "GradientBoostingClassifier(learning_rate=0.5981565344248039, max_depth=6,\n", - " max_features=0.14704006316550916,\n", - " min_samples_leaf=18, min_samples_split=14,\n", - " subsample=0.36853097212587516)" + "SVC(C=7.943520510912431, degree=1, kernel='linear', max_iter=3000,\n", + " probability=True)" ] }, "execution_count": 12, @@ -597,7 +3441,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -610,16 +3454,439 @@ { "data": { "text/html": [ - "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.026228617618654658)),\n",
-       "                ('zerocount', ZeroCount()),\n",
-       "                ('bernoullinb', BernoulliNB(alpha=0.04656547221901433))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('variancethreshold',\n",
+       "                 VarianceThreshold(threshold=0.16682490562982172)),\n",
+       "                ('nystroem',\n",
+       "                 Nystroem(gamma=0.7638884024411401, kernel='linear',\n",
+       "                          n_components=98)),\n",
+       "                ('extratreesclassifier',\n",
+       "                 ExtraTreesClassifier(max_features=0.41763504253232936,\n",
+       "                                      min_samples_leaf=8, min_samples_split=17,\n",
+       "                                      n_jobs=1))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.026228617618654658)),\n", - " ('zerocount', ZeroCount()),\n", - " ('bernoullinb', BernoulliNB(alpha=0.04656547221901433))])" + "Pipeline(steps=[('variancethreshold',\n", + " VarianceThreshold(threshold=0.16682490562982172)),\n", + " ('nystroem',\n", + " Nystroem(gamma=0.7638884024411401, kernel='linear',\n", + " n_components=98)),\n", + " ('extratreesclassifier',\n", + " ExtraTreesClassifier(max_features=0.41763504253232936,\n", + " min_samples_leaf=8, min_samples_split=17,\n", + " n_jobs=1))])" ] }, "execution_count": 13, @@ -635,27 +3902,13 @@ " \n", "])\n", "\n", - "stc_pipeline = tpot2.search_spaces.pipelines.SequentialPipeline([\n", - " tpot2.config.get_search_space(\"preprocessors1\"), \n", - " tpot2.config.get_search_space(\"imputation\"), \n", - " tpot2.config.get_search_space(\"selectors\"), \n", - " tpot2.search_spaces.pipelines.GraphPipeline(\n", - " root_search_space= tpot2.config.get_search_space(\"classifiers\"),\n", - " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", - " inner_search_space = tpot2.config.get_search_space([\"transformers\",\"classifiers\"]),\n", - " max_size = 10,\n", - " )\n", - " \n", - "])\n", - "\n", - "\n", "print(\"sampled pipeline\")\n", "stc_pipeline.generate().export_pipeline()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -668,23 +3921,432 @@ { "data": { "text/html": [ - "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0005298121736972592)),\n",
-       "                ('normalizer', Normalizer()),\n",
-       "                ('mlpclassifier',\n",
-       "                 MLPClassifier(alpha=0.00120637383824527,\n",
-       "                               learning_rate_init=0.001497725714419087))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('variancethreshold',\n",
+       "                 VarianceThreshold(threshold=0.029163176782587025)),\n",
+       "                ('rbfsampler',\n",
+       "                 RBFSampler(gamma=0.3360335889875927, n_components=61)),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(min_samples_leaf=2,\n",
+       "                                        min_samples_split=5))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0005298121736972592)),\n", - " ('normalizer', Normalizer()),\n", - " ('mlpclassifier',\n", - " MLPClassifier(alpha=0.00120637383824527,\n", - " learning_rate_init=0.001497725714419087))])" + "Pipeline(steps=[('variancethreshold',\n", + " VarianceThreshold(threshold=0.029163176782587025)),\n", + " ('rbfsampler',\n", + " RBFSampler(gamma=0.3360335889875927, n_components=61)),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(min_samples_leaf=2,\n", + " min_samples_split=5))])" ] }, "execution_count": 14, @@ -708,31 +4370,449 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 5/5 [00:48<00:00, 9.63s/it]\n" + "Generation: 0%| | 0/5 [00:00#sk-container-id-10 {color: black;}#sk-container-id-10 pre{padding: 0;}#sk-container-id-10 div.sk-toggleable {background-color: white;}#sk-container-id-10 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-10 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-10 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-10 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-10 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-10 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-10 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-10 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-10 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-10 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-10 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-10 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-10 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-10 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-10 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-10 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-10 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-10 div.sk-item {position: relative;z-index: 1;}#sk-container-id-10 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-10 div.sk-item::before, #sk-container-id-10 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-10 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-10 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-10 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-10 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-10 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-10 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-10 div.sk-label-container {text-align: center;}#sk-container-id-10 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-10 div.sk-text-repr-fallback {display: none;}
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
-       "              scorers=['roc_auc'], scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x71f059a54400>,\n",
-       "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
+       "              population_size=10, processes=False, scorers=['roc_auc'],\n",
+       "              scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x7ebd8bf94bb0>,\n",
+       "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n", - " scorers=['roc_auc'], scorers_weights=[1],\n", - " search_space=,\n", + " population_size=10, processes=False, scorers=['roc_auc'],\n", + " scorers_weights=[1],\n", + " search_space=,\n", " verbose=2)" ] }, @@ -755,10 +4835,17 @@ "\n", "\n", "#define the search space\n", + "# graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + "# root_search_space= tpot2.config.get_search_space(\"classifiers\"),\n", + "# leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + "# inner_search_space = tpot2.config.get_search_space([\"transformers\",\"classifiers\"]),\n", + "# max_size = 10,\n", + "# )\n", + "\n", "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", - " root_search_space= tpot2.config.get_search_space(\"classifiers\"),\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", - " inner_search_space = tpot2.config.get_search_space([\"transformers\",\"classifiers\"]),\n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", " max_size = 10,\n", ")\n", "\n", @@ -768,6 +4855,7 @@ " classification = True,\n", " cv = 5,\n", " search_space = graph_search_space,\n", + " population_size= 10,\n", " generations = 5,\n", " max_eval_time_seconds = 60*5,\n", " verbose = 2,\n", @@ -778,14 +4866,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "auroc score 0.9569231877561475\n" + "auroc score 0.9890552995391705\n" ] } ], @@ -800,12 +4888,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -836,7 +4924,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/Tutorial/3_Feature_Set_Selector.ipynb b/Tutorial/3_Feature_Set_Selector.ipynb new file mode 100644 index 00000000..19767db2 --- /dev/null +++ b/Tutorial/3_Feature_Set_Selector.ipynb @@ -0,0 +1,1244 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Special Feature Selection nodes in TPOT2\n", + "\n", + "TPOT2 can use evolutionary algorithms to optimize feature selection simultaneously with pipeline optimization. There are two node search spaces included.\n", + "\n", + "1. FSSNode - (Feature Set Selector) This node is useful if you have predefined groups of features that you want to select from. For example, one group could include the first x columns, the next group could include the next y columns, etc. Each FeatureSetSelector Node will select a single group to be passed to the next step in the pipeline. This node is also useful if you want to select individual columns at a time, this will be used in tutorial 4 to create a symbolic regression search space. \n", + "\n", + "2. GeneticFeatureSelectorNode - Whereas FSSNode selects from a predefine list of subsets of features, this node instead uses evolutionary algorithms to optimize a novel subset from scratch. This is useful where there is no predefined grouping of features.\n", + "\n", + "\n", + "It may also be beneficial to pair these search spaces with a secondary objective function to minimize complexity. That would encourage TPOT to try to produce the simplest pipeline with the fewest number of features.\n", + "\n", + "tpot2.objectives.number_of_nodes_objective - This can be used as an other_objective_function that counts the number of nodes.\n", + "\n", + "tpot2.objectives.complexity_scorer - This is a scorer that can be used in the scorers parameter that tries to count the total number of learned parameters (number of coefficients, number of nodes in decision trees, etc.).\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature Set Selector\n", + "\n", + "The FeatureSetSelector is a subclass of sklearn.feature_selection.SelectorMixin that simply returns the manually specified columns. The parameter sel_subset specifies the name or index of the column that it selects. The transform function then simply indexes and returns the selected columns. You can also optionally name the group with the name parameter, though this is only for note keeping and does is not used by the class.\n", + "\n", + "\n", + "sel_subset: list or int\n", + " If X is a dataframe, items in sel_subset list must correspond to column names\n", + " If X is a numpy array, items in sel_subset list must correspond to column indexes\n", + " int: index of a single column\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original DataFrame\n", + " a b c d e f\n", + "0 0 1 2 3 4 5\n", + "1 0 1 2 3 4 5\n", + "2 0 1 2 3 4 5\n", + "3 0 1 2 3 4 5\n", + "4 0 1 2 3 4 5\n", + "5 0 1 2 3 4 5\n", + "6 0 1 2 3 4 5\n", + "7 0 1 2 3 4 5\n", + "8 0 1 2 3 4 5\n", + "9 0 1 2 3 4 5\n", + "Transformed Data\n", + "[[0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]\n", + " [0 1 2]]\n" + ] + } + ], + "source": [ + "import tpot2\n", + "import pandas as pd\n", + "import numpy as np\n", + "#make a dataframe with columns a,b,c,d,e,f\n", + "\n", + "#numpy array where columns are 1,2,3,4,5,6\n", + "data = np.repeat([np.arange(6)],10,0)\n", + "\n", + "df = pd.DataFrame(data,columns=['a','b','c','d','e','f'])\n", + "fss = tpot2.builtin_modules.FeatureSetSelector(name='test',sel_subset=['a','b','c'])\n", + "\n", + "print(\"original DataFrame\")\n", + "print(df)\n", + "print(\"Transformed Data\")\n", + "print(fss.fit_transform(df))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use the FSS with TPOT2, you can simply pass it in to the configuration dictionary. Note that the FSS is only well defined when used in the leaf nodes of the graph. This is because downstream nodes will receive different transformations of the data such that the original indexes no longer correspond to the same columns in the raw data.\n", + "\n", + "TPOT2 includsing the string \"feature_set_selector\" in the leaf_config_dict parameter will include the FSS in the search space of the pipeline. By default, each FSS node will select a single column. You can also group columns into sets so that each node selects a set of features rather than a single feature.\n", + "\n", + "\n", + "\n", + "subsets : str or list, default=None\n", + " Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries.\n", + " - str : If a string, it is assumed to be a path to a csv file with the subsets. \n", + " The first column is assumed to be the name of the subset and the remaining columns are the features in the subset.\n", + " - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets.\n", + " - None : If None, each column will be treated as a subset. One column will be selected per subset.\n", + " If subsets is None, each column will be treated as a subset. One column will be selected per subset.\n", + "\n", + "\n", + "Lets say you want to have three groups of features, each with three columns each. The following examples are equivalent:\n", + "\n", + "### str\n", + "\n", + "sel_subsets=simple_fss.csv\n", + "\n", + "\n", + "\\# simple_fss.csv\n", + "\n", + "group_one, 1,2,3\n", + "\n", + "group_two, 4,5,6\n", + "\n", + "group_three, 7,8,9\n", + "\n", + "\n", + "### dict\n", + "\n", + "\n", + "sel_subsets = { \"group_one\" : [1,2,3],\n", + " \"group_two\" : [4,5,6],\n", + " \"group_three\" : [7,8,9],\n", + " }\n", + "\n", + "\n", + "### list\n", + "\n", + "\n", + "sel_subsets = [[1,2,3],[4,5,6],[7,8,9]]\n", + "\n", + "\n", + "\n", + "(As the FSS is just another transformer, you could also pass it in with the standard configuration dictionary format (described in tutorial 2), in which you would have to define your own function that returns a hyperparameter. Similar to the params_LogisticRegression function below. )\n", + "\n", + "\n", + "(In the future, FSS will be treated as a special case node with its own mutation/crossover functions to make it more efficient when there are large numbers of features.)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcdefghi
01.412867-1.1271430.7240590.0127190.1376170.4687240.2879770.3496210.671031
1-1.4594650.698617-0.8197860.4572250.5673080.2366090.5318910.6822630.391485
2-0.176559-0.4373060.2618830.8143600.7403320.2443210.2876910.5474820.936978
31.834518-1.486981-0.0595760.5338450.1247670.3672450.7554940.8385290.018747
41.316622-0.4299211.5977240.8048680.6724300.2659850.2620430.4121930.974847
\n", + "
" + ], + "text/plain": [ + " a b c d e f g \\\n", + "0 1.412867 -1.127143 0.724059 0.012719 0.137617 0.468724 0.287977 \n", + "1 -1.459465 0.698617 -0.819786 0.457225 0.567308 0.236609 0.531891 \n", + "2 -0.176559 -0.437306 0.261883 0.814360 0.740332 0.244321 0.287691 \n", + "3 1.834518 -1.486981 -0.059576 0.533845 0.124767 0.367245 0.755494 \n", + "4 1.316622 -0.429921 1.597724 0.804868 0.672430 0.265985 0.262043 \n", + "\n", + " h i \n", + "0 0.349621 0.671031 \n", + "1 0.682263 0.391485 \n", + "2 0.547482 0.936978 \n", + "3 0.838529 0.018747 \n", + "4 0.412193 0.974847 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tpot2\n", + "import sklearn.datasets\n", + "from sklearn.linear_model import LogisticRegression\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "\n", + "X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=3, n_informative=3, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n", + "X = np.hstack([X, np.random.rand(X.shape[0],6)]) #add six uninformative features\n", + "X = pd.DataFrame(X, columns=['a','b','c','d','e','f','g','h','i']) # a, b ,c the rest are uninformative\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", + "X.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature Set Selector\n", + "\n", + "In this configuration, each FSS node considers a single column.\n", + "\n", + "The root node is a logistic regression and there are no other intermediate transformers. An additional objective function is included that seeks to minimize the number of leave nodes (i.e the number of selected features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 0%| | 0/5 [00:00#sk-container-id-1 {\n", + " /* Definition of color scheme common for light and dark mode */\n", + " --sklearn-color-text: black;\n", + " --sklearn-color-line: gray;\n", + " /* Definition of color scheme for unfitted estimators */\n", + " --sklearn-color-unfitted-level-0: #fff5e6;\n", + " --sklearn-color-unfitted-level-1: #f6e4d2;\n", + " --sklearn-color-unfitted-level-2: #ffe0b3;\n", + " --sklearn-color-unfitted-level-3: chocolate;\n", + " /* Definition of color scheme for fitted estimators */\n", + " --sklearn-color-fitted-level-0: #f0f8ff;\n", + " --sklearn-color-fitted-level-1: #d4ebff;\n", + " --sklearn-color-fitted-level-2: #b3dbfd;\n", + " --sklearn-color-fitted-level-3: cornflowerblue;\n", + "\n", + " /* Specific color for light theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-icon: #696969;\n", + "\n", + " @media (prefers-color-scheme: dark) {\n", + " /* Redefinition of color scheme for dark theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-icon: #878787;\n", + " }\n", + "}\n", + "\n", + "#sk-container-id-1 {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "#sk-container-id-1 pre {\n", + " padding: 0;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-hidden--visually {\n", + " border: 0;\n", + " clip: rect(1px 1px 1px 1px);\n", + " clip: rect(1px, 1px, 1px, 1px);\n", + " height: 1px;\n", + " margin: -1px;\n", + " overflow: hidden;\n", + " padding: 0;\n", + " position: absolute;\n", + " width: 1px;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-dashed-wrapped {\n", + " border: 1px dashed var(--sklearn-color-line);\n", + " margin: 0 0.4em 0.5em 0.4em;\n", + " box-sizing: border-box;\n", + " padding-bottom: 0.4em;\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-container {\n", + " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", + " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", + " so we also need the `!important` here to be able to override the\n", + " default hidden behavior on the sphinx rendered scikit-learn.org.\n", + " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", + " display: inline-block !important;\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-text-repr-fallback {\n", + " display: none;\n", + "}\n", + "\n", + "div.sk-parallel-item,\n", + "div.sk-serial,\n", + "div.sk-item {\n", + " /* draw centered vertical line to link estimators */\n", + " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", + " background-size: 2px 100%;\n", + " background-repeat: no-repeat;\n", + " background-position: center center;\n", + "}\n", + "\n", + "/* Parallel-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item::after {\n", + " content: \"\";\n", + " width: 100%;\n", + " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", + " flex-grow: 1;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel {\n", + " display: flex;\n", + " align-items: stretch;\n", + " justify-content: center;\n", + " background-color: var(--sklearn-color-background);\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item {\n", + " display: flex;\n", + " flex-direction: column;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n", + " align-self: flex-end;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n", + " align-self: flex-start;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n", + " width: 0;\n", + "}\n", + "\n", + "/* Serial-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-serial {\n", + " display: flex;\n", + " flex-direction: column;\n", + " align-items: center;\n", + " background-color: var(--sklearn-color-background);\n", + " padding-right: 1em;\n", + " padding-left: 1em;\n", + "}\n", + "\n", + "\n", + "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", + "clickable and can be expanded/collapsed.\n", + "- Pipeline and ColumnTransformer use this feature and define the default style\n", + "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", + "*/\n", + "\n", + "/* Pipeline and ColumnTransformer style (default) */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable {\n", + " /* Default theme specific background. It is overwritten whether we have a\n", + " specific estimator or a Pipeline/ColumnTransformer */\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "/* Toggleable label */\n", + "#sk-container-id-1 label.sk-toggleable__label {\n", + " cursor: pointer;\n", + " display: block;\n", + " width: 100%;\n", + " margin-bottom: 0;\n", + " padding: 0.5em;\n", + " box-sizing: border-box;\n", + " text-align: center;\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n", + " /* Arrow on the left of the label */\n", + " content: \"▸\";\n", + " float: left;\n", + " margin-right: 0.25em;\n", + " color: var(--sklearn-color-icon);\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "/* Toggleable content - dropdown */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content {\n", + " max-height: 0;\n", + " max-width: 0;\n", + " overflow: hidden;\n", + " text-align: left;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content pre {\n", + " margin: 0.2em;\n", + " border-radius: 0.25em;\n", + " color: var(--sklearn-color-text);\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", + " /* Expand drop-down */\n", + " max-height: 200px;\n", + " max-width: 100%;\n", + " overflow: auto;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", + " content: \"▾\";\n", + "}\n", + "\n", + "/* Pipeline/ColumnTransformer-specific style */\n", + "\n", + "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator-specific style */\n", + "\n", + "/* Colorize estimator box */\n", + "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n", + "#sk-container-id-1 div.sk-label label {\n", + " /* The background is the default theme color */\n", + " color: var(--sklearn-color-text-on-default-background);\n", + "}\n", + "\n", + "/* On hover, darken the color of the background */\n", + "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "/* Label box, darken color on hover, fitted */\n", + "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator label */\n", + "\n", + "#sk-container-id-1 div.sk-label label {\n", + " font-family: monospace;\n", + " font-weight: bold;\n", + " display: inline-block;\n", + " line-height: 1.2em;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label-container {\n", + " text-align: center;\n", + "}\n", + "\n", + "/* Estimator-specific */\n", + "#sk-container-id-1 div.sk-estimator {\n", + " font-family: monospace;\n", + " border: 1px dotted var(--sklearn-color-border-box);\n", + " border-radius: 0.25em;\n", + " box-sizing: border-box;\n", + " margin-bottom: 0.5em;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "/* on hover */\n", + "#sk-container-id-1 div.sk-estimator:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", + "\n", + "/* Common style for \"i\" and \"?\" */\n", + "\n", + ".sk-estimator-doc-link,\n", + "a:link.sk-estimator-doc-link,\n", + "a:visited.sk-estimator-doc-link {\n", + " float: right;\n", + " font-size: smaller;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1em;\n", + " height: 1em;\n", + " width: 1em;\n", + " text-decoration: none !important;\n", + " margin-left: 1ex;\n", + " /* unfitted */\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted,\n", + "a:link.sk-estimator-doc-link.fitted,\n", + "a:visited.sk-estimator-doc-link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "/* Span, style for the box shown on hovering the info icon */\n", + ".sk-estimator-doc-link span {\n", + " display: none;\n", + " z-index: 9999;\n", + " position: relative;\n", + " font-weight: normal;\n", + " right: .2ex;\n", + " padding: .5ex;\n", + " margin: .5ex;\n", + " width: min-content;\n", + " min-width: 20ex;\n", + " max-width: 50ex;\n", + " color: var(--sklearn-color-text);\n", + " box-shadow: 2pt 2pt 4pt #999;\n", + " /* unfitted */\n", + " background: var(--sklearn-color-unfitted-level-0);\n", + " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted span {\n", + " /* fitted */\n", + " background: var(--sklearn-color-fitted-level-0);\n", + " border: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link:hover span {\n", + " display: block;\n", + "}\n", + "\n", + "/* \"?\"-specific style due to the `` HTML tag */\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link {\n", + " float: right;\n", + " font-size: 1rem;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1rem;\n", + " height: 1rem;\n", + " width: 1rem;\n", + " text-decoration: none;\n", + " /* unfitted */\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "#sk-container-id-1 a.estimator_doc_link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "
Pipeline(steps=[('featuresetselector',\n",
+       "                 FeatureSetSelector(name='group_one',\n",
+       "                                    sel_subset=['a', 'b', 'c'])),\n",
+       "                ('graphpipeline',\n",
+       "                 GraphPipeline(graph=<networkx.classes.digraph.DiGraph object at 0x7ff98829b100>))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featuresetselector',\n", + " FeatureSetSelector(name='group_one',\n", + " sel_subset=['a', 'b', 'c'])),\n", + " ('graphpipeline',\n", + " GraphPipeline(graph=))])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fitted_pipeline_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that if you want to include multiple subsets, you can instead include the node as a leaf in the graph search space. This will produce a pipeline where all leaves as FSSNodes and all FSSNodes appear in the leaves (to prevent inner nodes from also being FSSNodes). Since the graph search space allows for multiple leaves, this pipeline can select multiple feature sets. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 0%| | 0/5 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "est.fitted_pipeline_.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Other examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 0%| | 0/5 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcdefghi
0-1.290879-2.012016-1.0094340.0832512.350751-0.1922950.2665300.9893230.207050
1-2.329471-1.033893-2.656589-1.0254893.015554-1.1069470.5000590.8534730.596733
20.948998-0.1237830.530650-3.0253071.3910291.1761660.6624100.9452520.861687
3-3.2658662.1012295.1416770.5008880.613011-1.4708350.7347250.7188540.751557
4-2.232187-0.825902-1.4303462.3419290.8458660.3424700.2612210.9774950.732266
\n", - "" - ], - "text/plain": [ - " a b c d e f g \\\n", - "0 -1.290879 -2.012016 -1.009434 0.083251 2.350751 -0.192295 0.266530 \n", - "1 -2.329471 -1.033893 -2.656589 -1.025489 3.015554 -1.106947 0.500059 \n", - "2 0.948998 -0.123783 0.530650 -3.025307 1.391029 1.176166 0.662410 \n", - "3 -3.265866 2.101229 5.141677 0.500888 0.613011 -1.470835 0.734725 \n", - "4 -2.232187 -0.825902 -1.430346 2.341929 0.845866 0.342470 0.261221 \n", - "\n", - " h i \n", - "0 0.989323 0.207050 \n", - "1 0.853473 0.596733 \n", - "2 0.945252 0.861687 \n", - "3 0.718854 0.751557 \n", - "4 0.977495 0.732266 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import tpot2\n", - "import sklearn.datasets\n", - "from sklearn.linear_model import LogisticRegression\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "n_features = 6\n", - "X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=n_features, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n", - "X = np.hstack([X, np.random.rand(X.shape[0],3)]) #add three uninformative features\n", - "X = pd.DataFrame(X, columns=['a','b','c','d','e','f','g','h','i'])\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "\n", - "X.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def params_LogisticRegression(trial, name=None):\n", - " params = {}\n", - " params['solver'] = trial.suggest_categorical(name=f'solver_{name}',\n", - " choices=[f'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])\n", - " params['dual'] = False\n", - " params['penalty'] = 'l2'\n", - " params['C'] = trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True)\n", - " params['l1_ratio'] = None\n", - " if params['solver'] == 'liblinear':\n", - " params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2'])\n", - " if params['penalty'] == 'l2':\n", - " params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False])\n", - " else:\n", - " params['penalty'] = 'l1'\n", - "\n", - " params['class_weight'] = trial.suggest_categorical(name=f'class_weight_{name}', choices=['balanced'])\n", - " param_grid = {'solver': params['solver'],\n", - " 'penalty': params['penalty'],\n", - " 'dual': params['dual'],\n", - " 'multi_class': 'auto',\n", - " 'l1_ratio': params['l1_ratio'],\n", - " 'C': params['C'],\n", - " }\n", - " return param_grid\n", - "\n", - "\n", - "\n", - "root_config_dict = {LogisticRegression: params_LogisticRegression}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feature selection for single classifier\n", - "\n", - "In this configuration, each FSS node considers a single column.\n", - "\n", - "The root node is a logistic regression and there are no other intermediate transformers. An additional objective function is included that seeks to minimize the number of leave nodes (i.e the number of selected features)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:13<00:00, 1.52it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9074667008196723\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tpot2\n", - "import sklearn.datasets\n", - "from sklearn.linear_model import LogisticRegression\n", - "import numpy as np\n", - "\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " other_objective_functions=[tpot2.objectives.number_of_leaves_objective],\n", - " other_objective_functions_weights=[-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=None,\n", - " subsets=None,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "\n", - "est.fit(X_train, y_train)\n", - "print(scorer(est, X_test, y_test))\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='3', sel_subset=['d'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='4', sel_subset=['e'])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='5', sel_subset=['f'])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=3371.8568398103916, solver='saga')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='3', sel_subset=['d'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='4', sel_subset=['e'])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='5', sel_subset=['f'])\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]\n", - "\n", - "#plot the pareto front of number_of_leaves_objective vs roc_auc_score\n", - "\n", - "import matplotlib.pyplot as plt\n", - "plt.scatter(pareto_front['number_of_leaves_objective'], pareto_front['roc_auc_score'])\n", - "plt.xlabel('Number of Selected Features')\n", - "plt.ylabel('roc_auc_score')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feature selection with arithmetic transformers to create features for final classifier \n", - "\n", - "here we include arithmetic operators in the inner nodes that can combine and transform the selected features. \n", - "\n", - "We now use the number of nodes objective to minimize the complexity of the resulting equation. This minimized the number of selected features and the number of arithmetic operators" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:13<00:00, 1.44it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9307120901639344\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " other_objective_functions=[tpot2.objectives.number_of_nodes_objective],\n", - " other_objective_functions_weights=[-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=\"arithmetic_transformer\",\n", - " subsets = None,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "est.fit(X_train,y_train)\n", - "print(sklearn.metrics.get_scorer('roc_auc_ovr')(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='5', sel_subset=['f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='1', sel_subset=['b'])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='4', sel_subset=['e'])\n", - "FeatureSetSelector_4 : FeatureSetSelector(name='3', sel_subset=['d'])\n", - "FeatureSetSelector_5 : FeatureSetSelector(name='0', sel_subset=['a'])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=1.3234861148420467, solver='liblinear')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='5', sel_subset=['f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='1', sel_subset=['b'])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='4', sel_subset=['e'])\n", - "mul_neg_1_Transformer_1 : mul_neg_1_Transformer()\n", - "EQTransformer_1 : EQTransformer()\n", - "FeatureSetSelector_4 : FeatureSetSelector(name='3', sel_subset=['d'])\n", - "NETransformer_1 : NETransformer()\n", - "FeatureSetSelector_5 : FeatureSetSelector(name='0', sel_subset=['a'])\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]\n", - "\n", - "#plot the pareto front of number_of_leaves_objective vs roc_auc_score\n", - "\n", - "plt.scatter(pareto_front['number_of_nodes_objective'], pareto_front['roc_auc_score'])\n", - "plt.xlabel('Number of Nodes')\n", - "plt.ylabel('roc_auc_score')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Examples of FSS that select from groups of features rather than individual features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## dictionary" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:26<00:00, 1.31s/it]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9699667008196722\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tpot2\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.linear_model import LogisticRegression\n", - "import sklearn\n", - "\n", - "subsets = { \"group_one\" : ['a','b','c'],\n", - " \"group_two\" : ['d','e','f'],\n", - " \"group_three\" : ['g','h','i'],\n", - " }\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", - " scorers_weights=[1,-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=\"transformers\",\n", - " subsets = subsets,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "est.fit(X_train,y_train)\n", - "print(sklearn.metrics.get_scorer('roc_auc_ovr')(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='group_one', sel_subset=['a', 'b', 'c'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='group_two', sel_subset=['d', 'e', 'f'])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='group_three', sel_subset=['g', 'h', 'i'])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=0.06776401610163652, solver='saga')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='group_one', sel_subset=['a', 'b', 'c'])\n", - "PolynomialFeatures_1 : PolynomialFeatures(include_bias=False)\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='group_two', sel_subset=['d', 'e', 'f'])\n", - "MaxAbsScaler_1 : MaxAbsScaler()\n", - "PCA_1 : PCA(n_components=0.9574868087370769)\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='group_three', sel_subset=['g', 'h', 'i'])\n", - "MaxAbsScaler_2 : MaxAbsScaler()\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## list" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:21<00:00, 1.07s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9712474385245903\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACnsElEQVR4nOzdd1QUVxsG8GcLHSkrCohiRenVhoDGkmjU2GLXCGokllhiLDHGaIwm1sTeu0lM1YRYEmOJBrAgvQmo9Ca49KXt7nx/qPs5LirIwizL+zvHc5J3dmceLPDu3Dv38hiGYUAIIYQQQpo8PtcBCCGEEEKIalBjRwghhBCiIaixI4QQQgjRENTYEUIIIYRoCGrsCCGEEEI0BDV2hBBCCCEagho7QgghhBANQY0dIYQQQoiGoMaOEEIIIURDUGNHCCGEEKIhqLEjhBBCCNEQ1NgRQgghhGgIauwIIYQQQjQENXaEEEIIIRqCGjtCCCGEEA1BjR0hhBBCiIagxo4QQgghRENQY0cIIYQQoiGosSOEEEII0RDU2BFCCCGEaAhq7AghhBBCNAQ1doQQQgghGoIaO0IIIYQQDUGNHSGEEEKIhqDGjhBCCCFEQ1BjRwghhBCiIaixI4QQQgjRENTYEUIIIYRoCCHXAQghRJVkMhnEYjFyc3ORm5uLvJwcVJaXQy6TgS8QQEdPD60sLGBubg5zc3OIRCIIBAKuYxNCiErwGIZhuA5BCCH1VVBQgMjISESHhaGirAyMVArD8nIYi8XQkkrBZxjIeTxUC4UoEolQqqcHnlAIXQMDOLm7w8XFBaamplx/GYQQUi/U2BFCmrSsrCwEBwYiOSkJWhIJrNPSYSkWw7isDFoy2QvfVy0QoMjAANkiEdKs26FaXx8dbWzg5eMDS0vLRvwKCCFEdaixI4Q0SVKpFEFBQQgJCoJhfj66pKahbX4+BHJ5nc8l4/ORYWaGe+2tUWpmhh5eXvDy8oJQSLNVCCFNCzV2hJAmJycnB+cCAlCQkQnbpCTYZGaCr4JvZXIeD0lWVrhrYwNRWysMHTECFhYWKkhMCCGNgxo7QkiTkpqaijM//QT9rGx4xMfDSCJR+TWK9fURamcHSZs2GD1hPNq3b6/yaxBCSEOgxo4Q0mSkpqbit1On0DI1DT3j4iB8jWHX2pLy+bjlYA+xtTXenTSJmjtCSJNA69gRQpqEnJwcnPnpJ4hS09A7NrZBmzoAEMrl8IyJhSgtDWd++hk5OTkNej1CCFEFauwIIWpPKpXiXEAA9LOy0SsuTiXz6WqDzzDoFRsHvewsnA8IgFQqbZTrEkLI66LGjhCi9oKCglCQkQmP+PgGv1P3PKFcDo+4eIgzMxEcHNyo1yaEkLqixo4QotaysrIQEhQE26SkBnlQojaMJRJ0S0zC7cBAZGdnc5KBEEJqgxo7QohaCw4MhGF+PmwyMznN0TUzE4b5+QgKDOQ0ByGEvAw1doQQtVVQUIDkpCR0SU1rtHl1L8JnGHROTUNyYiIKCgo4zUIIIS9CjR0hRG1FRkZCSyJB2/x8rqMAANrl50MokSAqKorrKIQQUiNq7AghakkmkyE6LAzWaemvtU1YQxDI5Wifno6o0FDIXrIPLSGEcIUaO0KIWhKLxagoK4OlWMx1FBbLR49zidUsFyGEANTYEUJeQigUwtXVVfGrvLy8zufYtGnTa107NzcXjFQKk9JSVn1XWiqGhoVieFgoxkSEI72i4qXnOZiRXq/397x5g/X/xmVlYKRS5ObmvvR927ZtQ1VV1UtfUxulpaUYOHAgDA0NsWTJknqfjxCi2YRcByCEqC8TExNERETU6xybNm3CsmXL6vQemUyG3NxcGJaXs9atCysuxq2iIvzh6gYtPh85lZXQE7z88+nBjAzMatvutd//PC2ZDIbl5cjNzYWjo+MLX7dt2za8//770NbWrtV55XI5+HzlLFpaWli9ejViY2Nx//79OmUlhDQ/dMeOEFInf//9Nzw9PeHm5oapU6cq7kr5+/vDw8MDDg4O2LJlCwBg5cqVKCwshKurK2bPno2UlBR0795dca4lS5bg2LFjAIAOHTrgk08+gZubG65cuYLTv/6KLUeP4p2wMHz14AEAIK+qCqZCLWg9aYAsdHRgLNQCAPxXUIDxkREYGR6GJQl3USWX45uUFJRIpRgRHobP7yXV+f3PO5CRjjER4dhw6BCOHj6sqK9fvx5OTk5wdnbGt99+i927dyMrKwt9+vTBiBEjAAAnT56Ek5MTHB0dsXnzZgBASkoKnJycMHHiRNjb29d4R1RHRwd9+/aFnp7ea/6JEUKaE7pjRwh5oadNGQB0794dGzZswObNm3HlyhXo6enh888/x8GDBzFv3jxs2LABIpEIUqkUPj4+mDBhAtavX4/9+/cr7vqlpKS89Hrt2rVDeHg44uPjcfv2bax/+210T07B0oQEXBWL4WVigp1pqXg79A68TEwxsnVrOLVoAXF1NQ5lZOCEoxN0BQJsT03Bzzk5WNyhA37MyUaAmzsAoFQqrdP7p7Zpo8gWWFCAnMpK/ObiirBOnbA25DZiYmKQlpaGK1eu4M6dO9DR0YFYLIZIJMLmzZsRHBwMQ0NDZGZmYs2aNQgJCYG+vj769OmDAQMGoGXLloiPj8f3338PZ2fnhvgjJIQ0M9TYEUJe6Pmh2LNnzyIqKgqenp4AgMrKSgwbNgwAcOrUKRw6dAgymQwZGRm4e/cu2rVrV6frjRs3DgBw+fJl3H/wACuSk6FXVYUKmRyOhoboLxLhdzd33CosRHBRIabHxGC7rS2qGDkSJGUYHxUJAKiSy/GGSKR0fkOh8LXfH1hYgH/FBbhTHI7yuFhIBAIkJiYiMDAQ06dPh46ODgBAVMN1Q0JCMHDgQMWxsWPHIjAwECNHjkTXrl2pqSOEqAw1doSQWpPL5Rg2bBiOHj3Kqj948AC7d+/GjRs3YGxsjLFjx6KyslLp/UKhEPJnhjiff42+vr7iOv18fDBJJILb/Qfsc/B48DI1hZepKURCLVwSP4K3iSneMBVhQ9eur/waXvf9cgb40NoaY8zNEd65Myp8vDFmzBgE1nMniqdfMyGEqALNsSOE1JqnpyeuXr2K1NRUAEBxcTGSk5NRUlICQ0NDGBkZISMjA5cuXVK8RyAQKNZ8a926NbKyslBSUoLS0lL8888/NV5n4MCBCAkNhVgqBQA8qqrCw6oqPJBIkPZkHhrDMEiUlKGNjg7cjFrgVlEhMp884VoqlSqedhXweJA92bXidd7/lLepCX7JzUG5TIYqoRBFJSUoKirCoEGDcPToUUWT+nQZlBYtWqCkpAQA0LNnT1y+fBkFBQWorKzE6dOn4ePj89p/DoQQ8iJ0x44QUmutWrXCwYMH8e6776Kqqgp8Ph/btm3DG2+8ATs7O9ja2qJDhw7w9vZWvMfX1xdOTk7o27cv9u3bh2XLlsHNzQ3W1tZwcnKq8ToODg6Y5uuLdYcOYVtZGbT4fGy06YpKRo619++j9Emj6GBgiPcs20BXIMC6LjaYfzce1XI5eDweVnbshHa6uhjd2hzDw0LRw9gY4y0s6vz+p/qainBPIsH4yAgUJyVCdPMGxk+ahKFDhyI0NBTu7u7Q0tLC9OnTsXDhQsyaNQv9+/dH165dERAQgNWrV6Nv375gGAa+vr5wd3d/5ZzDp7p164a8vDxUV1fjxx9/xM2bN9G2bdvX/FMkhGgyHsNwvAEjIYTUICYmBud/+QXDr12Hlhrt8lAtEOBsv74YOm7cS5c7IYQQLtBQLCFELZmbm4MnFKLIwIDrKCxFBgbgCYUwNzfnOgohhCihoVhCiFoSiUTQNTBAtkgEs+JiruMoZLd8nKump1/r49GjRxg4cCCrpqOjg1u3bqn0OoQQzUaNHSFELQkEAji5uyPi0SPYp6VBUMOCwY1NxucjtV07uHt4QCAQqPTcLVu2rPcuH4QQQkOxhBC15eLigmp9fWSYmb30ddVSKR7mPURWdjaKnzyJWhvFJSXIys7Gw7yHqH7yBO7LpJuZQaqvT+vOEULUFjV2hBC1ZWpqio42NrjX3hpyHq/G18gZBmKxGFKpFACD0tKSWjVp1VIpSktLADCQSqUQi8WQv+RZMjmPh/vtrdGxa1eYmpq+5ldECCENixo7Qoha8/LxQamZGZKsrGo8XlxcDJns1Y3cq8hkUhS/ZC5fopUVSs3M4PXMUi6EEKJuqLEjhKg1S0tL9PDywl0bGxQ/t0tDRWUlJJIyVk1bWwdawldPH9YSCqGtrcOqSSRlqKhhx4wifX0kdLVBT29vWFpavsZXQQghjYMaO0KI2vPy8oJpWyuE2tlByn/8bUvOMCgsLGS9jsfjw8TEpNbnNTExAY/H/jZYVFjIGpKV8vkItbeDyMoKffr0ee2vgRBCGgM1doQQtScUCjFsxAhI2rTBLQd7yHk8FBUVQS5nL1xsZGQEYR2eVhUKBDAyMmLVZHIZioqKADyeV3fLwR7llm0wdMQICGtxJ5AQQrhEjR0hpEmwsLDA6AnjIba2RqBtN5RWsYdMdXR0YfDcUG1tGOjrQ0dHl1UrL5egrKoKNxwdILa2xugJ42FhYVGv/IQQ0hiosSOENBnt27fHwLffRqKBASL69oXkyd22x0Owxq99XhNjY9aQbJmREa66uEBsbY13J01C+/bt652dEEIaA+0VSwhpMhiGwfjx43H9+nWMGDYMVqamsLl7F/YP82Coq/vqE7yEpLwc4qJCZHXtiiRbW2SKxSivrsbJkyfBe8FSK4QQom6osSOENBk//vgjJk2aBODxzhR9+vRBfy8vWFZWonNqGtrl57/WDhUyPh/pZmaINW+NXD09BIWEIDg4GDKZDKdOncLEiRNV/aUQQkiDoMaOENIkZGdnw8HBAQUFBYpay5Yt8e+//+JufDySExMhlEjQPj0dlo/EMC4rg5ZM9sLzVQsEKDIwQHZLEVLbtYNUXx+W7drhy/XrkZiYqHidSCRCTEwMLXNCCGkS6BEvQojaYxgG/v7+rKYOAPbu3QtHR0c4OjqioKAAUVFRiAoNxf2yMjBSKQzLy2EkLoC2VAo+I4ecx0eVUIhikSlK9fTAEwqha2AAdw8PODs7w9TUFNq6upgwYYLiGmKxGP7+/ggICKAhWUKI2qM7doQQtXf06FHMmDGDVZswYQJ+/PFHpdfKZDKIxWLk5uYiNzcXeTk5qKqogEwqhUAohLauLlpZWMDc3Bzm5uYQiUQQPLdEyoQJE/Dzzz8rZfDz81P510YIIapEjR0hRK2lpaXB0dERJSUlipq5uTliY2PRsmXLBrlmfn4+HB0dkZubq6gZGRkhJiYG7dq1a5BrEkKIKtByJ4QQtSWXyzFz5kxWUwcABw8ebLCmDgDMzMxw4MABVq24uBgzZswAfRYmhKgzauwIIWpr3759uHTpEqvm5+eHd955p8GvPWLECPj6+rJqly5dwr59+xr82oQQ8rpoKJYQopbu378PZ2dnSCQSRa1t27aIiYmBsfHrL0ZcF4WFhXB0dERmZqaiZmBggMjISHTu3LlRMhBCSF3QHTtCiNqRyWTw8/NjNXUAcPjw4UZr6gDAxMQER44cYdXKysowffp0yF9jvTxCCGlo1NgRQtTO9u3bERgYyKrNnj0bb731VqNneeutt/DBBx+wav/99x+2b9/e6FkIIeRVaCiWEKJW4uPj4ebmhsrKSkWtY8eOiIqKgqGhISeZSkpK4OLiguTkZEVNR0cHERERsLW15SQTIYTUhO7YEULUhlQqha+vL6up4/F4OHbsGGdNHQC0aNECR48eZdUqKyvh6+sLqVTKUSpCCFFGjR0hRG1s3LgRISEhrNqiRYvQt29fjhL9X79+/bBo0SJW7fbt29i0aRM3gQghpAY0FEsIUQuRkZHo0aMHqqurFbVu3bohPDwcenp6HCb7v/Lycri6urL2ktXS0kJISAhcXFw4TEYIIY/RHTtCCOeqqqowbdo0VlPH5/Nx/PhxtWnqAEBPTw/Hjx8Hn///b53V1dXw9fVFVVUVh8kIIeQxauwIIZz78ssvERUVxaotX74cvXr14ijRi/Xu3RvLli1j1SIjI/Hll19ylIgQQv6PhmIJIZwKCQmBp6cnZDKZoubk5ISQkBDo6OhwmOzFKisr0b17d8TExChqAoEAN27cQI8ePThMRghp7qixI4Rwpry8HB4eHoiPj1fUhEIhQkJC4Orqyl2wWggPD0fPnj1ZT8Xa2dkhLCwMurq6HCYjhDRnNBRLCOHMqlWrWE0dAHz++edq39QBgJubG1atWsWqxcfH47PPPuMoESGE0B07QghHAgMD0bdvXzz7LcjDwwM3btyAlpYWh8lqr7q6Gp6enggNDVXUeDwerl+/Dm9vbw6TEUKaK2rsCCGNrqysDC4uLrh//76ipqOjg9DQUDg4OHCYrO5iY2Ph7u7Oeiq2c+fOiIyMhIGBAYfJCCHNEQ3FEkIa3fLly1lNHfD4ydim1tQBgIODg9ITsffv38fy5cs5SkQIac7ojh0hpFFdvnwZgwYNYtX69OmD69evQyAQcJSqfmQyGXx8fHDjxg1W/dKlSxg4cCBHqQghzRE1doSQRlNUVARnZ2ekpaUpanp6eoiMjISNjQ2HyeovKSkJLi4uKC8vV9Ssra0RHR0NIyMjDpMRQpoTGoolhDSaxYsXs5o6ANi0aVOTb+oAwMbGBhs3bmTV0tLSsHjxYo4SEUKaI7pjRwhpFOfOncPw4cNZtf79++PSpUusLbqaMrlcjkGDBuHq1aus+tmzZzFs2DCOUhFCmhNq7AghDU4sFsPBwQE5OTmKWosWLRAVFYUOHTpwF6wBpKSkwMnJCaWlpYqapaUlYmJiIBKJOExGCGkONONjMiFErc2fP5/V1AHAN998o3FNHQB06NAB33zzDauWnZ2N+fPnc5SIENKc0B07QkiD+u233zB27FhW7e2338a5c+fA4/E4StWwGIbB0KFD8ddff7Hqv/76K959912OUhFCmgNq7AghDebhw4dwcHBAfn6+omZiYoLY2Fi0adOGw2QNLzMzE46OjigsLFTUzMzMEBsbi9atW3MXjBCi0WgolhDSIBiGwQcffMBq6gBg165dGt/UAYCVlRV27tzJquXn52POnDmgz9OEkIZCjR0hpEF8//33+P3331m10aNHY/LkydwE4sCUKVMwevRoVu306dP44YcfOEpECNF0NBRLCFE5Gob8v+Y8HE0IaXx0x44QolIMw+D9999nNXUAsG/fvmbX1AFA69atsXfvXlatsLAQ77//Pg3JEkJUjho7QohKHT58WOlp0MmTJzfrp0HHjh2LSZMmsWoXLlzAkSNHOEpECNFUNBRLCFEZWpz3xV60SPPt27fxzz//IC0tDdOnT4e9vT2HKQkhTR01doQQlaDttF6tpm3V9PT0UF5eDgAwMjJCamoqTExMOEhHCNEE1NgRQlRi586dWLBgAas2Y8YMHD58mKNE6mnmzJkvHYL96aefMH78eMhkMojFYuTm5iI3Nxd5OTmoLC+HXCYDXyCAjp4eWllYwNzcHObm5hCJRBAIBI34lRBC1BE1doSQektKSoKLi4vizhMAWFtbIzo6GkZGRhwmUz/Xr1/HwIEDIZVKazy+fv169OnTB9FhYagoKwMjlcKwvBzGYjG0pFLwGQZyHg/VQiGKRCKU6umBJxRC18AATu7ucHFxgampaSN/VYQQdUGNHSGkXmQyGXx8fHDjxg1W/dKlSxg4cCBHqdRTcnIyXF1dUVxcrHTMwsIC3n36wNHWFsYArNPSYSkWw7isDFoy2QvPWS0QoMjAANkiEdKs26FaXx8dbWzg5eMDS0vLBvxqCCHqSMh1AEJI0/bNN98oNXXz5s2jpq4GV65cUWrqBAIB+vTpA68ePWBWWopOIXdgV1kJgVxeq3NqyWQwKy6GWXEx7NPSkGFmhnuPHuH7e/fQw8sLXl5eEArpWz0hzQXdsSOEvLbY2Fi4u7ujqqpKUevcuTMiIyNhYGDAYTL1FB0dDXd3d8UwbOvWrTFi2DBYmZrC5u5dtElMhBaPD3Nz83pdR87jIcnKCndtbCBqa4WhI0bAwsJCFV8CIUTNUWNHCHkt1dXV8PT0RGhoqKLG4/Fw/fp1eHt7c5hMvV28eBFz5syBVCrF+FGjYCmRwC40FPrP3MmztGwDngquVayvj1A7O0jatMHoCePRvn17FZyVEKLOaIFiQshr+frrr1lNHQAsXryYmrpXeOutt/D333/jfV9fdCwohOv166ymDng8b1EVjCQS+ISHwyQlGb+dOoXU1FSVnJcQor7ojh0hpM7Cw8PRs2dP1pOddnZ2CAsLg66uLofJ1F9OTg5+PHECJskp6BUdjdLiYkgkZc+8ggdLS0uV3LF7Ss7j4YajAwo7dMTEae/RsCwhGozu2BFC6qSyshLTpk1jNXUCgQDHjx+npu4VpFIpzgUEQD8rG73i4iDk8WBibAwzs1bQ0tKGllALZmZmKm3qAIDPMOgVGwe97CycDwh44VIrhJCmjxo7QkidfPHFF4iJiWHVVqxYgR49enCUqOkICgpCQUYmPOLjIXzmqVdtLS20MjNDq1atoK2l1SDXFsrl8IiLhzgzE8HBwQ1yDUII96ixI4TU2s2bN7Fx40ZWzcXFBatWreIoUdORlZWFkKAg2CYlwUgi4SSDsUSCbolJuB0YiOzsbE4yEEIaFjV2hJBakUgk8PX1hfyZO01aWlo4fvw4tLW1OUzWNAQHBsIwPx82mZmc5uiamQnD/HwEBQZymoMQ0jCosSOE1MrKlSuRmJjIqq1evRouLi4cJWo6CgoKkJyUhC6paeBz/Lwan2HQOTUNyYmJKCgo4DQLIUT1qLEjhLzStWvXsH37dlatR48eWL58OUeJmpbIyEhoSSRom5/PdRQAQLv8fAglEkRFRXEdhRCiYtTYEUJeqrS0FNOnT8ezKyPp6Ojg+PHjtFVVLchkMkSHhcE6Lb3W24Q1NIFcjvbp6YgKDVXZmnmEEPVAjR0h5KWWLl2K5ORkVu2rr76CnZ0dR4maFrFYjIqyMliKxVxHYbF89DiXWM1yEULqhxo7QsgLXbx4Efv27WPVvL29sXDhQo4S1Z1QKISrqyscHR0xbtw4SF7yRKqfnx/Onj2r0uvn5uaCkUphUlr6wtecys7Guby8l55nalQUEsseL2TcP+Q23gkLxYjwMIwID0NaeXmdc/2RcBfVlZXIzc2t83tfl0Qiwdtvvw1bW1s4ODhg586djXZtQpoLauwIITUqLCzEzJkzWTV9fX0cO3YMAoGAo1R1Z2JigoiICMTExEBbW1upUW1oubm5MCwvZ61b97xJlpYY1qpVnc77o4srAtzcEeDmDms9vTrn+i4jA7qlpXVq7FQxbPvJJ5/g7t27uHXrFnbv3o179+7V+5yEkP+jxo4QUqNFixYhIyODVduyZQs6d+7MUaL68/Hxwb1795Cfn4933nkHzs7OeOONN5CSksJ63eXLlzFp0iTF/x8+fBhLlixBSkoKXFxc4OvrCzs7O0yYMEEx9/DixYuKO4OLFy9W1EeNGoU/zp3DkNA7mBMXi5CiIkyMisSgOyEIf7JH7I7UVJzMygLw+O7dmIhwvBMWhsUJd1Fdy3l50SUlmBIVidHh4fggNhaF1dUAgO2pqRgTEY5hYaH46sF9AMB3WVl4WFWFbSdOYNnSpQAAMzMzxbl27dqFNWvWAADeeOMNLFq0CN27d8fJkyfx999/w9PTE25ubpg6dSqqqqogk8kwdepU2Nvbw8nJCUePHq0xo76+Pvr16wcAMDQ0RLdu3Wg9PUJUjBo7QoiSgIAAHD9+nFUbNGgQZs+ezVGi+pNKpbhw4QKcnJywZs0a+Pj4ICoqCnPmzMGCBQtYrx0wYAAiIiJQ/KTxOnnyJHx9fQEA8fHxWL58OeLi4pCbm4vAwECUl5dj1qxZ+P333xEVFYWEhAScOXMGwOOHT3q1bYu/PLqjXC7Hd9lZ+MHJGas7d8GBjHSlnG+bmeG0qxv+dHeHmZY2LrzgSdqJkREYER6G92NjUC2XY0PyA+y2s8cZNze82bIl9j85t2+bNjjt6oazbu7IqqxEaHERprZpg9ba2lj39lAsnDfvlb93WlpauHPnDoYPH47NmzfjypUrCA8PR6dOnXDw4EFEREQgOTkZcXFxiI6OxpgxY155zvT0dERFRcHd3f2VryWE1B490kYIYcnPz4e/vz+rZmRkhMOHD4PHU/Uupg2vsLAQrq6uAIC+ffti5syZ6NmzJ86fPw8AGD9+vNKcQR6Ph/Hjx+Pnn3/GoEGDUFJSAicnJ6SkpKBbt26wt7cHALi5uSElJQUtWrRAt27d0KFDBwDAlClT8N9//2HMmDHQ0dGBs4UF8CAZXfUN0ElPD3weD1319ZFRUamU925ZGbalpaJUKkWJTAZdfs2fv390cYXBkyHxxLIy3C0rw7SYaACAjGHQRV8fAHCjqBCHMjJQJZfjUXU1fExN4WFk/PjrZOS12jd23LhxAB7vPBIVFQVPT08Aj/cNHjZsGCZPnoysrCzMmzcPI0eOxFtvvfXS81VWVmLChAnYvHkzDAwMXnl9QkjtUWNHCGGZN2+e0ryrbdu2wdramqNE9fN0jt3L1NSw+vn5wdfXF9nZ2Zg2bZqirqOjo/hvgUDwynlnWlpakD85P58HaD9p1Pg8HuRQXqz406QkHHBwQBd9fZzMykJmZcVLzw8AcgD2hoY46eTMqlfK5Vj/4AFOu7qhtbY2NiQ/QJX8/9dkeHwInixZ8+zvQWUlu+HUf9IkyuVyDBs2rMah1ujoaJw/fx7ffvstLl68iC1bttSYlWEYTJs2DUOHDsXYsWNf+bURQuqGhmIJIQo//fQTfv75Z1Zt+PDh8PPz4yZQA/H29sYPP/wAAPj111/Rs2dPpdd07NgRQqEQBw8exOTJk196vm7duiExMRGpqamQy+U4deoU+vbtqzheXYf1/srlMphpaaFKLn/lk7JPddLTQ3ZlJWJKSwAAVXI57kskqJTLwQNgIhSiRCrFpUePFO8xEAhQzDDQ1tUFABgbGyM1NRXV1dUvfDLY09MTV69eRWpqKgCguLgYycnJyM/Ph1wux/jx47FmzZqXNtIrVqyAvr4+Pvvss1p9bYSQuqE7doQQAEBOTg7mzp3LqpmamuLAgQNNcgj2ZdasWQM/Pz+cOHECIpEIx44dq/F1EyZMwLlz59DqFU+s6unp4cCBAxg5ciSkUineeustjBo1CgDA5/NRJBLVOtt8a2uMiYhAS20t2NdymFKbz8c2W1use/AAZVIZ5GAwt501OuvrY3RrcwwNC0VrbW24tmiheM94Cwus+/svdEpJxsQpU7Bu3ToMGDAAFhYWsLW1rfE6rVq1wsGDB/Huu++iqqoKfD4f27Ztg6mpKfz8/CCXyyEUCrFt27Ya35+RkYGNGzfC3t5eMTy+ceNGDB48uNa/P4SQl+MxDMcbFxJCOMcwDEaNGoWAgABW/dSpU5g4cSJHqbjn5+eH0aNHY+TIka99jpiYGJz/5RcMv3YdWmq0y0O1QICz/fpi6LhxcHR05DoOIURFaCiWEIITJ04oNXVjx47FhAkTOErEPUdHR+Tm5uKdd96p13nMzc3BEwpRpGYPCRQZGIAnFMLc3JzrKIQQFaKhWEKaufT0dKWnQlu3bo09e/Zo3BBsXcTExKjkPCKRCLoGBsgWiWD2ZPkUdZDd8nEuUR2Gieti9OjRSlvRnTx5Ek5OTg1yPULIY9TYEdKMMQyD999/H0VFRaz6/v37XzmvjNSOQCCAk7s7Ih49gn1aGgS1XHC4Icn4fKS2awd3D48G20Xk6Tp+hJDGRUOxhDRjBw4cwMWLF1m19957TzHxn6iGi4sLqvX1kfHM7g6qxDAMSkpLUFRUBGkt5vGlm5lBqq8PZ2fnV76WENK0UGNHSDP14MEDfPzxx6xamzZtsH37do4SaS5TU1N0tLHBvfbWijXtVKmoqAglJSUok5QhLy/vpc2dnMfD/fbW6Ni1K0xNTVWehRDCLWrsCGmG5HI5pk+fjrKyMlb98OHD9MO+gXj5+KDUzAxJVlYqP3fVk31hAYBh5CgsLKxh6ePHEq2sUGpmBi9vb5XnIIRwjxo7QpqhHTt24Pr166zarFmzMGTIEI4SaT5LS0v08PLCXRsbFD/ZyUFVdJ/ZDQMAqqoqlZp2ACjS10dCVxv09PaGpaWlSjMQQtQDNXaENDMJCQlYsWIFq9ahQwds3bqVo0TNh5eXF0zbWiHUzg7SF+wB+zoMW7SAQMB+Fq6kuJi1D6yUz0eovR1EVlbo06ePyq5NCFEv1NgR0oxIpVL4+vqiooK9/+iRI0fQ4pldCUjDEAqFGDZiBCRt2uCWg73K5tvxeTyYmJiwagwYxZCsnMfDLQd7lFu2wdARIyCswxZnhJCmhRo7QpqRLVu24NatW6zaggUL0L9/f44SNT8WFhYYPWE8xNbWuOHooLI7dzra2jB4bhHkquoqFEskuOHoALG1NUZPGA8LCwuVXI8Qop5oSzFCmono6Gh4eHig+pmJ9jY2NoiIiIC+iud8kVdLTU3FmZ9+hn5WFjzi42EkkdT7nAzD4GFeHmSyx0OwZUZGuNu9O2QdOmD8lClo3759va9BCFFv1NgR0gxUVVWhd+/eCA8PV9T4fD7+++8/mm/FoZycHJwLCEBBRiZsk5Jgk5kJfj2/JVdVVeGhWIysrjZIsrVFpliMpORk/PPPP9DS0lJRckKIuqKJFoQ0A+vXr2c1dQCwZMkSauo4ZmFhAd8ZMxAUFIQQXR1kWFqgc2oa2uXnv9YOFTI+H1lt2yK+R3dk6+ggKCQEwcHBkMlkWL9+PdasWaP6L4IQolbojh0hGi40NBS9evWC7JlFax0cHHDnzh3o6upymIw8KysrC8FBQUhOTIRQIkH79HRYPhLDuKwMWi9ZcLhaIECRgQGyW4qQ2q4dpPr6aN+5M7bv3Ing4GDF64RCIW7evAkPD4/G+HIIIRyhxo4QDVZRUQEPDw/ExcUpagKBALdu3aIf8GqqoKAAUVFRiAoNRUVZGRipFIbl5TASF0BbKgWfkUPO46NKKESxyBSlenrgCYXQNTCAs4cHnJ2dYWpq+sKGPjQ0FDrPrXtHCNEc1NgRosGWL1+OTZs2sWqrV6+mIbkmQCaTQSwWIzc3F7m5ucjLyUFVRQVkUikEQiG0dXXRysIC5ubmMDc3h0gkgkAgYJ1j9erVWLt2Lau2fPlybNiwoTG/FEJII6LGjhANFRwcDG9vbzz7T9zNzQ23bt2iSfTNRFVVFXr16oWIiAhFjc/nIzAwEJ6entwFI4Q0GGrsCNFAZWVlcHV1xb179xQ1bW1t3LlzB05OThwmI42NlrkhpHmhBYoJ0UArVqxgNXUA8MUXX1BT1ww5OTnhiy++YNWSkpKUtpUjhGgGumNHiIa5evUqBgwYwKr17t0b//33H20l1UxJpVJ4e3sr7Tpy5coV2nWEEA1DjR0hGqS4uBjOzs5ITU1V1HR1dREREYFu3bpxmIxwLSEhAa6urqx9gjt06ICoqCjaJ5gQDUJDsYRokCVLlrCaOgDYsGEDNXUE3bp1w9dff82qpaSkYMmSJRwlIoQ0BLpjR4iGuHDhAoYOHcqq9evXD1euXAFfRRvNk6ZNLpejf//+uH79Oqt+4cIFDBkyhKNUhBBVosaOEA1QUFAAR0dHZGVlKWoGBgaIjo5Gx44dOUxG1M2DBw/g7OyMsrIyRc3KygrR0dEwNTXlMBkhRBXoYzwhGmDBggWspg4Atm7dSk0dUdKpUyds2bKFVcvMzMTChQs5SkQIUSW6Y0dIE3fmzBmMGTOGVXvrrbfw119/gcfjcZSKqDOGYTB48GD8888/rPqZM2cwatQobkIRQlSCGjtCmrC8vDw4ODggLy9PUTM2NkZMTAzatm3LYTKi7tLT0+Ho6Iji4mJFrXXr1oiNjYWZmRmHyQgh9UFDsYQ0UQzDYM6cOaymDgB27NhBTR15pXbt2mHHjh2s2sOHDzFnzhzQ531Cmi66Y0dIE3Xq1ClMnjyZVRsxYgR+//13GoIltcIwDEaOHIk///yTVT916hQmTpzIUSpCSH1QY0dIE5SdnQ0HBwcUFBQoai1btkRMTAwsLCw4TEaampycHDg4OEAsFitqIpEIMTExsLS05DAZIeR10FAsIU0MwzCYNWsWq6kDgD179lBTR+rMwsICe/fuZdXEYjH8/f1pSJaQJogaO0KamGPHjuHcuXOs2oQJEzB+/HiOEpGmbvz48Up/f86ePYvjx49zlIgQ8rpoKJaQJiQtLQ2Ojo4oKSlR1MzNzREbG4uWLVtymIw0dfn5+XB0dERubq6iZmRkhJiYGLRr147DZISQuqA7doQ0EXK5HDNnzmQ1dQBw4MABaupIvZmZmeHAgQOsWnFxMWbMmEFDsoQ0IdTYEdJE7Nu3D5cuXWLVfH19MWLECI4SEU0zYsQI+Pr6smqXLl3Cvn37OEpECKkrGoolpAm4f/8+nJ2dIZFIFLW2bdsiOjoaJiYm3AUjGqewsBCOjo7IzMxU1AwMDBAZGYnOnTtzmIwQUht0x44QNSeTyeDn58dq6gDg8OHD1NQRlTMxMcGRI0dYtbKyMkyfPh1yuZyjVISQ2qLGjhA1t337dgQGBrJqs2fPxltvvcVRIqLp3nrrLXzwwQes2n///Yft27dzlIgQUls0FEuIGouPj4ebmxsqKysVtY4dOyIqKgqGhoYcJiOarqSkBC4uLkhOTlbUdHR0EBERAVtbWw6TEUJehu7YEaKmpFIpfH19WU0dj8fD0aNHqakjDa5FixY4evQoq1ZZWQlfX19IpVKOUhFCXoUaO0LU1MaNGxESEsKqLVy4EP369eMoEWlu+vXrh0WLFrFqt2/fxqZNm7gJRAh5JRqKJUQNRUZGokePHqiurlbUunXrhvDwcOjp6XGYjDQ35eXlcHV1RWJioqKmpaWFkJAQuLi4cJiMEFITumNHiJqpqqrCtGnTWE0dn8/H8ePHqakjjU5PTw/Hjx8Hn///HxfV1dXw9fVFVVUVh8kIITWhxo4QNfPll18iKiqKVVu+fDl69erFUSLS3PXu3RvLli1j1SIjI/Hll19ylIgQ8iI0FEuIGgkJCYGnpydkMpmi5uTkhJCQEOjo6HCYjDR3lZWV6N69O2JiYhQ1gUCAGzduoEePHhwmI4Q8ixo7QtREeXk5PDw8EB8fr6gJhULcvn0bbm5uHCYj5LHw8HD07NmT9VSsnZ0dwsLCoKury2EyQshTNBRLiJpYtWoVq6l7WqOmjqgLNzc3rFq1ilWLj49XqhFCuEN37AjhSFFREdLS0uDg4IDg4GD07dsXz/5z9PDwwI0bN6ClpcVhSkLYqqur4enpidDQUEWNx+Ph+vXr8Pb25jAZIQSgxo4QToSEhGDw4MEoKChAly5dIJFIkJWVpTiura2NsLAwODg4cJiSkJrFxsbC3d2d9VRs586dERkZCQMDAw6TEUJoKJYQDmzZsgUFBQUAgHv37rGaOgBYt24dNXVEbTk4OCg9EXv//n0sX76co0SEkKfojh0h9SCTySAWi5Gbm4vc3Fzk5eSgsrwccpkMfIEAOnp6aGVhAXNzc5ibm0MkEkEgEMDe3l5pPt1TTk5OCA8Ph0AgaOSvhpDak8lk8PHxwY0bN1j1S5cuYeDAgRylIoRQY0fIaygoKEBkZCSiw8JQUVYGRiqFYXk5jMViaEml4DMM5DweqoVCFIlEKNXTA08ohK6BARzd3DBx4kQ8fPiwxnMLBAIcO3YMU6dObeSvipC6SUpKgouLC8rLyxU1a2trREdHw8jIiMNkhDRf1NgRUgdZWVkIDgxEclIStCQSWKelw1IshnFZGbSeWXvuedUCAYoMDJAtEiGlXVvkV1UhKTkZgcHByMnJUXq9mZkZHj58CB6P15BfDiH1tnPnTixYsIBVmzlzJg4dOsRRIkKaN2rsCKkFqVSKoKAghAQFwTA/H11S09A2Px8CubzO55JIpUjU00WajQ3yDQ0RFBKC4OBg1qLEVlZWSE9Pp8aOqD25XI5Bgwbh6tWrrPrZs2cxbNgwjlIR0nxRY0fIK+Tk5OBcQAAKMjJhm5QEm8xM8Ovxz6ZMIkFRUSHkPB6yunZFkq0tMsViBJw/j4cPH8LQ0BA//fQThg4dqsKvgpCGk5KSAicnJ5SWlipqlpaWiImJgUgk4jAZIc0PNXaEvERqairO/PQT9LOy4REfDyOJpN7nFBeIUVFRofh/iZER4j08kK2vj3KZDKtWrYK5uXm9r0NIYzp48CD8/f1ZtcmTJ+P777/nKBEhzRMtd0LIC6SmpuK3U6dgmpwCn/BwlTR1AMDjsf/Z6RcXo9fNW7CrrESntm1ZTR8hTcX777+PIUOGsGo//PADfvvtN44SEdI80R07QmqQk5ODH0+cgElyCjxjY+s19Po8mVyOvLw8yOUy8Hh8mJgYQ09XD3IeDzccHVDYoSMmTnsPFhYWKrsmIY0hMzMTjo6OKCwsVNTMzMwQGxuL1q1bcxeMkGaE7tgR8hypVIpzAQHQz8pGr7g4lTZ1ACDg82Fhbg5LyzawtLCAnq4eAIDPMOgVGwe97CycDwhgbbROSFNgZWWFnTt3smr5+fmYM2cO6B4CIY2DGjtCnhMUFISCjEx4xMdD+BpPvdZWTc+7CuVyeMTFQ5yZieDg4Aa7NiENZcqUKRg9ejSrdvr0afzwww8cJSKkeaHGjpBnZGVlISQoCLZJSSqbU1dXxhIJuiUm4XZgILKzsznJQMjr4vF42LdvH8zMzFj1Dz/8UGnrPEKI6lFjR8gzggMDYZifD5vMTE5zdM3MhGF+PoICAznNQcjraN26Nfbu3cuqFRYW4v3336chWUIaGDV2hDxRUFCA5KQkdElNU/m8urriMww6p6YhOTERBQUFnGYh5HWMHTsWkyZNYtUuXLiAI0eOcJSIkOaBGjtCnoiMjISWRIK2+flcRwEAtMvPh1AiQVRUFNdRCHktu3btUnq6+6OPPkJqaipHiQjRfNTYEQJAJpMhOiwM1mnpr7VNWEMQyOVon56OqNBQ1nZjhDQVIpFIac/YkpISzJgxA3I1+XdGiKahxo40KWlpaRg2bBhsbGzQpUsXrF69+rXm7KSkpKB79+6K/xeLxTh4+DCyk5Je+r6DGems/7cL/A8jwsMUv87k5tY5y8tYPhKjoqwMYrFYaTL667h9+za6d+8OLS0tnD17VgUJCXm5YcOGYcaMGazalStXsGfPHo4SEaLZqLEjTQbDMBg9ejQmT56MpKQkxMTEICwsDDt27Kj3uXNzc8EwDAzLy1/6uoMZGaz/byEUIsDNXfFrtIq3AjMuKwMjlSK3jg3ji+7wtWnTBocPH1aa+0RIQ/rmm2/Qrl07Vm358uVIesUHKUJI3Qm5DkBIbV2+fBmGhoaYMmUKAEBXVxc7duyAj48PCgoKkJGRgcTERGRkZOCrr77CxIkTAQAbN27Er7/+isrKSkybNg1LlixROndubi6EMhkEgsfDQ4EFBdiUkgwZw8DLxBQrOnbEt6mpKJFKMSI8DK4tWmBtF5sXZu158wbGmJsjsKAAIi0t7LN3gL5AgORyCT6/dw+F1VJo8Xk47ugELR4Pn927h4SyUmjz+fiyiw3sDQ0hrq7Coui7SI2NQVhCguLcMpkMy5Ytw/Xr11FVVYVly5ZhypQpOHbsGAICAiAWiyESiXD69GmlXG3btkXbtm3B59NnOtJ4jI2NceTIEbz55puKmkQigZ+fH65fvw6BQMBhOkI0CzV2pMmIi4uDu7s7q9axY0eUlZWhuLgY9+/fx+XLl5GWlobBgwdj4sSJuHjxIjIyMnD79m3I5XK8+eabGDJkCAwNDREXFwdXV1cAQGFBAfJycoCu3VAhk+Gze0n4zskZbXR08EFcLC4+eoTFHTrgx5xsBLj9P8PTRu+pTzt2Qm8TExRKpfAxNcUnHTthaUICLj7Kx6jW5liSkICP2neAt6kpymQyaPN4OJGVBUOBAH+6eyCiuBjLExPxp7s7dqal4Q2RCA4DBuKq7P+7UBw+fBiWlpYICQlBeXk5evfurdijMzIyEuHh4TAyMmrAPwlC6m7QoEGYO3cuawg2ODgY3377bY0ftgghr4c+thONMXz4cGhpaaFz586KvSovXryIc+fOwc3NDR4eHkhNTUViYiIAwN7eHhEREYiIiMAXq1bBydISAPCgvBwd9fTQVlcXfB4PI1q1RmhxUY3XfH4otreJCQDAQCCAl4kpAMDR0BCZFZUolUpRJJXC29RU8RotPh93iosx4sk+mq5GRqiUy1EilSK0uBjDzFpBWypFTw8PxTUvXryIQ4cOwdXVFZ6enigqKsKDBw8AAIMHD6amjqitjRs3onPnzqzaZ599hri4OI4SEaJ56I4daTLs7e2VhheTk5NhYGAAIyMj6OjoKL1HLpdj9erV8PX1ZdVTUlLYr5PJABWuXafF+/+GYXweD7LXPDePxwOfkUP2zL6xcrkc+/fvR79+/VivjY2Nhb6+/usFJqQRGBoa4tixY+jbt6/ioafKykr4+voiODgYWlpaHCckpOmjO3akyRg4cCCKiopw6tQpAI9/ICxatOilwzhvvfUWDh06BMmT7cFSUlJQVKR8940vEABPmrFOenpIKS9HZkUF5AyDs3l56G5kDAAQ1KNJMxQKYSwUIujJgsNlMhmq5XJ0NzLCn3kPAQCRJSXQFfDRQiiEh5ERzuflQc7j43ZoKOtr2rNnj+IBiZiYGFoOhTQZ3t7eWLx4Mat2584dbNiwgaNEhGgWauxIk8Hj8XDmzBmcOHECNjY2sLe3h5OTExYsWPDC9wwZMgSjR49G79694ejoiKlTp6KiokLpdTp6epA/aex0BQJ82cUGc+Lj8E54GNrr6eHNli0BAKNbm2N4WCg+v/f4ab6nc+ye/jr6iq3INnfthj3paXgnLBS+0dGokMsxxdISJVIp3gkLxdr797DBpisAYL61Na6IH2HxH3+g4JlmdNasWejQoQPc3Nzg6OiIjz76qNZLvkRFRaFt27b45Zdf4OfnB09Pz1q9jxBV+vLLL2Fra8uqrV27FhEREdwEIkSD8BjauI8QXL58GQl//403b9zkOoqSfzx7o9vgwRg4cCDXUQhRmdu3b6NPnz6su81OTk4ICQmpcVoFIaR26I4dIQDMzc1RqqeHajVbdqFaIECpnh7MVbw+HiFc69mzJz755BNWLTo6GmvXruUoESGagRo7QvC4seMJhSgyMOA6CkuRgQF4QmGdG7u///4brq6urF/z5s1roJSEvJ7PP/8czs7OrNqGDRtw69YtjhIR0vTRU7GE4PGelroGBsgWiWBWXMxJhsqqKlRVVUFbSwsCoQB8vgDZLR/nEolEdTrX4MGDMXjw4AZKSohqaGtr48SJE+jRoweqq6sBPH7q29fXF+Hh4dDT0+M4ISFND92xIwSAQCCAk7s70qzbQVaLXRnkcjmqqqtfa5/amhSXlODRo3yUlBTjkfgRHj58iMyHuYgTiXA1MBBDhgzB8ePHVXItQtSJi4sLVq9ezaolJCRg5cqVHCUipGmjxo6QJ1xcXFCtr48MM7OXvq6quhoP8x4iPz8PD/PyIJPL631tSVmZUi2/XTtIhEJcuXIFly5dgp+fH/766696X4sQdbN8+XL06NGDVdu2bRuuX7/OUSJCmi5q7Ah5wtTUFB1tbHCvvbVi6ZPnyRkGBQUFkD9p5mQyaY3Lp9QV/7mHNuQ8HtK7dEFicjJr3T1aDoJoIqFQiOPHj7OehmUYBn5+figtLeUwGSFNDzV2hDzDy8cHpWZmSLKyqvF4cXExZM/s2woAQhU8SWtqagrg/81kVteuyDc0RFBwsKLG5/MxfPjwel+LEHVkZ2eH9evXs2rJyclYunQpR4kIaZqosSPkGZaWlujh5YW7NjYofm57rorKSkgk7CFTbW0daKtgzS0toRAtWrQAAJQZGSHJ1hZBISHIyclRvIZhGJw4cUKxiwYhmmbRokXw9vZm1fbt24eLFy9ylIiQpocaO0Ke4+XlBdO2Vgi1s4P0yYMUcoZBUWEh63U8Hg8mJiaoedC27gwNDQBtHdz16I5MsRjBz9ytAx43dps3b4ajoyP+/vtvFV2VEPUhEAhw7NgxpT2PZ86cicLn/v0RQmpGjR0hzxEKhRg2YgQkbdrgloM95DweioqKIJOz92M1MjJWyTDsUwyPj3ve3sjS10PA+fMv3P81OTkZQ4YMwdSpU/Hw4UOVXZ8QddC5c2ds3ryZVcvIyMBHH33EUSJCmhZq7AipgYWFBUZPGA+xtTUCbbuhtKqSdVxHWwcGz91VqA8pn48bjg4o6tQRuWIxq2FzcHCAtra20nu+//572NnZ4ejRoypbdoUQdTB79mylLfSOHTuGgIAAjhIR0nTQXrGEvERUVBS+P3oU5qWlsAsNhX5xMXg8Plq3agWBiu7WFenrI9TeDuWWbTB6wnjo6elh2LBhuHPnDnr37o0///wTeXl5+OCDD/Dff//VeI7+/ftj//79sLGxUUkmQriWlpYGR0dHlJSUKGrm5uaIjY1Fy5YtOUxGiHqjxo6Ql5gwYQL+/fdfjBg2DFamprC5exf2D/NgqKtb73PLeTwkWlkhoasNRFZWGDpiBCwsLBTHCwsLYWJi8v/Xy+U4fPgwli5dyloC5SkdHR2sWrUKS5curfEOHyFNzdGjRzFjxgxWbcKECfjxxx85SkSI+qPGjpAX+OmnnzBx4kQAjyd19+nTB/29vGBZWYnOqWlol58PwWssTizj85FuZob77a1RamaGnt7e6NOnD4TC2u3wl5OTg0WLFuGnn36q8biDgwMOHjwIT0/POmcjRJ0wDIMRI0bg7NmzrPpPP/2E8ePHc5SKEPVGjR0hNcjJyYGDgwPEYrGiJhKJcPXqVSQmJCA5MRFCiQTt09Nh+UgM47IyaL3gYQcAqBYIUGRggOyWIqS2awepvj46du0KL29vWFpavlbGc+fOYe7cuUhLS1M6xuPxMGfOHHz11VcwNjZ+rfMTog6ys7Ph4OCAgoICRa1ly5aIjY2Fubk5h8kIUU/U2BHyHIZhMGrUKKWJ2qdOnVLcwSsoKEBUVBSiQkNRUVYGRiqFYXk5jMQF0JZKwWfkkPP4qBIKUSwyRameHnhCIXQNDODs4QFnZ+cnixLXT2lpKT7//HNs375dsRvGs9q0aYNdu3Zh9OjR9b4WIVw5deoUJk+ezKqNGDECv//+O3gv2CWGkOaKGjtCnnP8+HH4+fmxauPGjcNPP/2k9ENEJpNBLBYjNzcXubm5yMvJQVVFBWRSKQRCIbR1ddHKwgLm5uYwNzeHSCRS2UMXz7pz5w5mzZr1wi3HRo4ciV27dqFt27YqvzYhDY1hGIwfPx6//vorq378+HFMmzaNo1SEqCdq7Ah5Rnp6OpycnFgPJ7Ru3RqxsbEwMzPjMNmrSaVSbNu2DZ9//jnKy8uVjrdo0QJfffUV5syZ0yDNJSENKS8vDw4ODsjLy1PUjI2NERMTQx9YCHkGrWNHyBMMw+D9999XeuJ0//79at/UAY8XVl6yZAliY2MxePBgpeMlJSWYP38+vLy8EBUVxUFCQl5fq1atcODAAVatqKgIM2fOpHUcCXkGNXaEPHHgwAGlPSnfe+89jBo1iptAr6ljx464cOECfvjhB7Rq1Urp+K1bt+Dh4YFPP/20xjt7hKirUaNGYerUqazaxYsXcfDgQY4SEaJ+aCiWEAAPHjyAs7MzysrKFDUrKytER0er5CEHrojFYixduhRHjhyp8Xjnzp2xf/9+pVX+CVFXBQUFcHR0RFZWlqJmYGCA6OhodOzYkcNkhKgHumNHmj25XI7p06ezmjoAOHToUJNu6oDHS7QcPnwYV65cqXFXivv372PQoEHw9fVFfn4+BwkJqRtTU1McPnyYVSsrK8P06dNrfDKckOaGGjvS7O3YsQPXr19n1fz9/TFkyBCOEqle//79ERUVhVWrVkFLS0vp+IkTJ2Bra4uTJ0/SfCWi9oYMGYJZs2axateuXcPOnTs5SkSI+qChWNKsJSQkwNXVFRUVFYpahw4dEBUVhRYtWnCYrOHExsbC398fwcHBNR4fNGgQ9u3bh86dOzdyMkJqr6SkBE5OTkhNTVXUdHV1ERERgW7dunGYjBBu0R070mxJpVL4+vqymjrg8f6UmtrUAY+3HPvvv/+wd+9eGBkZKR2/dOkSHB0dsXHjRlRXV3OQkJBXa9GiBY4ePcqqVVRUwM/PD7KX7AJDiKajxo40W1u2bMGtW7dYtYULF+KNN97gJlAj4vP5mD17NuLj4/Huu+8qHa+oqMAnn3yC7t27K/0eEaIu+vfvj/nz57NqN2/exJYtWzhKRAj3aCiWNEvR0dHw8PBg3ZGysbFBREQE9PX1OUzGjT/++APz5s1DZmam0jEej4cPP/wQ69ev1+g7maRpkkgkcHV1RVJSkqKmra2N0NBQODo6cpiMEG7QHTvS7FRVVWHatGmspo7P5+P48ePNsqkDHm85FhcXh/nz5yttm8YwDHbu3Al7e3ul/XMJ4Zq+vj6OHTsGPv//P85q+jdOSHNBjR1pdtavX6+0p+rSpUvh6enJTSA1YWRkhB07duDGjRtwdnZWOp6RkYGRI0di7NixrDXECOFanz59sGTJElYtPDwc69ev5ygRIdyhoVjSrNy5cwe9e/dmTa52cHBAaGgodHR0OEymXqqrq/HNN99gzZo1Sg+XAI+bwI0bN8Lf3591p4QQrlRUVMDDwwNxcXGKmlAoxM2bN+Hh4cFhMkIaFzV2pNl40Tf+W7duwd3dncNk6uv+/fuYPXs2Ll26VOPxPn364MCBA3BwcGjkZIQoCw0NRa9eveiDG2nW6KM2aTY+//xzVlMHAJ999hk1dS/RuXNnXLx4ESdOnICZmZnS8eDgYLi5uWHVqlU13tkjpDF5eHhg5cqVrFpsbCxWr17NUSJCGh/dsSPNQnBwMLy9vVm7Kri7u+PmzZs17sRAlOXn5+Pjjz/GiRMnajxuY2ODAwcONIvlYoj6qqqqQu/evREeHq6o8fl8BAYGNvt5tKR5oMaOaLyysjK4urri3r17ihoth/D6Ll26hNmzZ+P+/fs1Hp8xYwY2b94MkUjUyMkIeYyWMyLNGQ3FEo23YsUKVlMHAGvXrqWm7jUNGjQI0dHRWLFiBYRCodLxI0eOwM7ODqdOnaJ9ZwknnJycsHbtWlYtKSkJK1as4CgRIY2H7tgRjXblyhUMHDiQVevduzcCAwMhEAg4SqU5oqKi4O/v/8LdKYYMGYI9e/agY8eOjZyMNHdSqRTe3t5KfzevXLmC/v37c5SKkIZHjR3RWMXFxXB2dmZtEq6np4eIiAh07dqVw2SaRSaTYe/evfj0009RUlKidFxfXx9ffPEFFi1aVOMdPkIaSkJCAlxdXVkP9nTo0AFRUVG0iwrRWDQUSzTWxx9/zGrqAGDDhg3U1KmYQCDAhx9+iLi4OIwcOVLpuEQiwdKlS9GzZ0/cuXOHg4SkuerWrRu+/vprVi0lJUVpMWNCNAndsSMa6cKFCxg6dCir9sYbb+Dy5cu0oG4DO3PmDD788MMad6fg8/lYuHAh1q5dC0NDQw7SkeZGLpdjwIABuHbtGqt+4cIFDBkyhKNUhDQcauyIxikoKICjoyOrsTA0NERUVBTN9WokRUVF+PTTT7F3794aH6CwtrbGnj17MGzYMA7SkeYmOTkZTk5OKCsrU9SsrKwQHR0NU1NTDpMRonp064JonAULFijdLdq6dSs1dY3I2NgYu3fvRmBgYI27UqSlpWH48OGYMGECcnJyOEhImpOOHTti69atrFpmZiYWLlzIUSJCGg7dsSMa5cyZMxgzZgyrNnjwYFy4cAE8Ho+jVM1bVVUVNm/ejC+//BKVlZVKx01MTLB582bMmDGDhslJg2EYBkOGDMHFixdZ9TNnzmDUqFHchCKkAVBjRzRGXl4eHBwckJeXp6gZGxsjJiYGbdu25TAZAYDExETMnj0bV69erfG4j48PDhw4AFtb20ZORpqLjIwMODo6oqioSFFr3bo1YmJi0KpVKw6TEaI69PGYaASGYTBnzhxWUwcAO3fupKZOTXTt2hWXL1/G0aNHa9yV4r///oOLiwu++OKLGu/sEVJfbdu2xY4dO1i1hw8fYu7cubSYNtEYdMeOaIRTp05h8uTJrNrIkSNx5swZGoJVQw8fPsTixYvx/fff13jc1tYWBw4cgI+PTyMnI5qOYRiMGjUKAQEBrPqpU6cwceJEjlIRojrU2JEmLysrC46OjigoKFDUWrZsidjYWJibm3OYjLzK33//jTlz5iA5ObnG47NmzcLGjRvpyUWiUjk5OXB0dMSjR48UNVNTU8TGxsLS0pLDZITUHw3FkiaNYRj4+/uzmjoA2Lt3LzV1TcDgwYMRExODZcuW1bjF28GDB2FnZ4eff/6ZhsqIylhYWGDPnj2sWkFBAfz9/envGWnyqLEjTdrRo0dx7tw5Vm3ixIkYN24cR4lIXenr62Pjxo24c+cOunfvrnQ8NzcXEyZMwDvvvIO0tDQOEhJNNH78eEyYMIFVO3v2LI4dO8ZNIEJUhIZiSZMSGBiIlStXQldXF9OnT4e/vz9rf1ILCwvExMSgZcuWHKYkr0smk2HXrl1YuXIlazHZpwwMDLBu3TrMnz+/xjt8hNTFo0eP4ODggNzcXEXNyMgI0dHRsLa25jAZIa+PGjvSZFRWVqJt27bIz89/4WsCAgLwzjvvNGIq0hDS0tIwd+5cpbuxT3Xv3h0HDhyAm5tbIycjmiYgIEBpj+NBgwbh4sWL9OAVaZKosSOckMlkEIvFyM3NRW5uLvJyclBZXg65TAa+QAAdPT20srCAubk5zM3NIRKJEBERUeNQ3VN+fn44evRoI34VpCExDINff/0V8+fPZ91ReUogEGDx4sVYvXo1DAwMOEhINIWfnx+OHz/Oqu3Zswdz5szhKBEhr48aO9KoCgoKEBkZieiwMFSUlYGRSmFYXg5jsRhaUin4DAM5j4dqoRBFIhFK9fTAEwqha2AAvo4OVq5cyVpc9FlLlizBpk2b6FO2hikoKMAnn3yCAwcO1Hi8Q4cO2LdvHwYPHtzIyYimKCwshJOTEzIyMhQ1fX19REVFoXPnzhwmI6TuqLEjjSIrKwvBgYFITkqClkQC67R0WIrFMC4rg5ZM9sL3VQsEKDIwQLZIhHuWFiiUy5GUnIzA4OAa9xg9duwYfH19G/JLIRwJDAyEv78/4uPjazw+efJkfPvtt2jdunUjJyOa4OLFi0ofDnx8fHD16lWaz0maFGrsSIOSSqUICgpCSFAQDPPz0SU1DW3z8yGQy+t8rkclJUgXmSLNxgb5hoYICglBcHAwZM80hosXL1ba7JtojsrKSmzcuBHr169HVVWV0nFTU1Ns3boVfn5+dOeW1NmcOXOwb98+Vm3r1q1YvHgxR4kIqTtq7EiDycnJwbmAABRkZMI2KQk2mZng1+OvW35+PqqqqyDn8ZDVtSuSbG2RKRYj4Px5PHz4ELq6urh27Rp69uypwq+CqKO7d+/igw8+wPXr12s8/sYbb2D//v3o2rVrIycjTVlpaSmcnZ1ZC2br6OggPDwcdnZ2HCYjpPaosSMNIjU1FWd++gn6WdnwiI+HkURS73Nm5+SAYf5/p09iZIR4Dw9k6evjXmoq1qxZA1dX13pfhzQNcrkcR48exZIlS1BYWKh0XEdHB5999hmWLVsGbW3txg9ImqRr166hf//+rIWKe/TogeDgYAiFQg6TEVI7tEAxUbnU1FT8duoUTJNT4BMerpKmDoDSivD6xcXoHhQMu/Jy+PTuTdtONTN8Ph8zZ85EfHx8jXt8VlZWYtWqVXB3d0dwcDAHCUlT1K9fPyxcuJBVCwkJwaZNmzhKREjd0B07olI5OTn48cQJmCSnwDM2tl5Dr897OhT7GA9GRkYwMDAAw+PhhqMDCjt0xMRp78HCwkJl1yRNx/nz5zFnzpwad6fg8XiYPXs2vv76axgbG3OQjjQl5eXlcHNzQ0JCgqKmpaWFkJAQuLi4cJiMkFejO3ZEZaRSKc4FBEA/Kxu94uJU2tQBQEszM7RoYQQDA0OYm5vD0MAAPAB8hkGv2DjoZWfhfEAApFKpSq9LmoahQ4ciNjYWixcvBp/P/tbGMAz27t0LOzs7nD59mvYDJS+lp6eH48ePs/4eVVdXw9fXt8aHdghRJ9TYEZUJCgpCQUYmPOLjIXyNp15fhQeghaEhjI2MIHjuB7dQLodHXDzEmZk07NaMGRoaYuvWrbh9+3aNu1JkZ2fj3XffxejRo1lrlhHyvF69emH58uWsWmRkJL788kuOEhFSO9TYEZXIyspCSFAQbJOSVDanrq6MJRJ0S0zC7cBAZGdnc5KBqAcPDw/cvn0bW7Zsgb6+vtLxP/74A3Z2dti5cydruRxCnrV69Wo4OTmxal9//TVCQkI4SkTIq1FjR1QiODAQhvn5sMnM5DRH18xMGObnIygwkNMchHtCoRAff/wxYmNjMWTIEKXjpaWlWLBgAfr06YOoqCgOEhJ1p6Ojg+PHj7OehpXJZPD19UVFRQWHyQh5MWrsSL0VFBQgOSkJXVLTVD6vrq74DIPOqWlITkxEQUEBp1mIeujQoQPOnz+PU6dO1bgrxe3bt+Hh4YEVK1agvLycg4REnbm5uWHVqlWsWnx8vFKNEHVBjR2pt8jISGhJJGibn891FABAu/x8CCUSugtDFHg8HiZOnIj4+HjMnDlT6bhUKsWGDRvg5OSES5cucZCQqLMVK1bAw8ODVdu6dSsCaWSAqCFq7Ei9yGQyRIeFwTot/bW2CWsIArkc7dPTERUaSvOnCItIJMKhQ4fw77//1rgrxf379/Hmm2/C19cX+WryQYVwT0tLC8ePH2ctdM0wDPz8/FBWVsZhMkKUUWNH6kUsFqOirAyWYjHXUVgsHz3OJVazXEQ99OvXD5GRkVi1ahW0tLSUjp84cQK2trY4ceIELY1CAAAODg5Yt24dq3b//n2lJ2cJ4Ro1dk2cUCiEq6srHB0dMW7cOEhe8kSqn58fzp49q9Lr5+bmgpFKYVJa+sLXnMrOxrm8vJeeZ2pUFBKffPLtH3Ib74SFYkR4GEaEhyHtNeY9/ZFwF9WVlcjNza3ze+tj3rx5MDc3R/fu3Rv1uqTudHV1sXbtWkRERMDLy0vp+KNHj+Dr64s333wT9+7d4yAhUTeLFy9Gnz59WLXdu3fj8uXLHCUiRBk1dk2ciYkJIiIiEBMTA21tbezbt69Rr5+bmwvD8vKXrls3ydISw1q1qtN5f3RxRYCbOwLc3GGtp1fnXN9lZEC3tLROjZ0qhm0nT56M8+fP1/s8pPHY29vj+vXr2LdvH4yMjJSOX758GU5OTtiwYQOqq6s5SEjUhUAgwLFjx6D33PekGTNmoLi4mKNUhLBRY6dBfHx8cO/ePeTn5+Odd96Bs7Mz3njjDaSkpLBed/nyZUyaNEnx/4cPH8aSJUuQkpICFxcX+Pr6ws7ODhMmTFAMQ128eFFxZ3Dx4sWK+qhRo/DHuXMYEnoHc+JiEVJUhIlRkRh0JwThT77R7UhNxcmsLACP796NiQjHO2FhWJxwF9W1nJcXXVKCKVGRGB0ejg9iY1H45Afs9tRUjIkIx7CwUHz14D4A4LusLDysqsK2EyewbOlSAICZmZniXLt27cKaNWsAAG+88QYWLVqE7t274+TJk/j777/h6ekJNzc3TJ06FVVVVZDJZJg6dSrs7e3h5OSEo0ePvjCnl5cXWrZsWauviagPPp+PDz74APHx8Rg7dqzS8YqKCsUE+lu3bnGQkKgLGxsbpX1j09LS8NFHH3GUiBA2auw0hFQqxYULF+Dk5IQ1a9bAx8cHUVFRmDNnDhYsWMB67YABAxAREaH4hHny5En4+voCePwY//LlyxEXF4fc3FwEBgaivLwcs2bNwu+//46oqCgkJCTgzJkzAB6vBdarbVv85dEd5XI5vsvOwg9OzljduQsOZKQr5XzbzAynXd3wp7s7zLS0ceEFE9QnRkZgRHgY3o+NQbVcjg3JD7Dbzh5n3NzwZsuW2P/k3L5t2uC0qxvOurkjq7ISocVFmNqmDVpra2Pd20OxcN68V/7eaWlp4c6dOxg+fDg2b96MK1euIDw8HJ06dcLBgwcRERGB5ORkxMXFITo6GmPGjKn9HwxpUtq0aYNffvkFf/zxB9q2bat0PDo6Gp6enliwYAFKSko4SEjUwdy5c9G/f39W7ciRIzh37hxHiQj5P2rsmrjCwkK4urqie/fuaN++PWbOnInAwEBMnToVADB+/Hjcvn2b9R4ej4fx48fj559/RkpKCkpKShSrq3fr1g329vbg8Xhwc3NDSkoKEhIS0K1bN3To0AF8Ph9TpkzBf//9B+DxAp7OFhYAgK76BvA0NgGfx0NXfX1kVFQq5b1bVoaJUZEYHhaKvx/l494L5gQ+HYo95OCI5PJy3C0rw7SYaIwID8PRrExkVT4+942iQrwbEY4R4WEIKy5mnY/HyCGrxb6x48aNAwDcvHkTUVFR8PT0hKurK3755RckJyejU6dOyMrKwrx583Dx4kXaRL4ZGDFiBOLi4rBw4ULweDzWMYZhsHPnTtjb2yMgIICjhIRLfD4fR48eRYsWLVj1WbNm0QNbhHPCV7+EqLOnc+xe5vkfTMDjByl8fX2RnZ2NadOmKeo6OjqK/xYIBK+cd6alpQX5k/PzeYD2kz1c+Twe5FB+mvDTpCQccHBAF319nMzKQmblq1dvlwOwNzTESSdnVr1SLsf6Bw9w2tUNrbW1sSH5Aark/78mw+ND8GTF+Gd/Dyor2Q3n0y2n5HI5hg0bVuNQa3R0NM6fP49vv/0WFy9exJYtW16ZmzRtLVq0wLZt2zB58mT4+/sjMjKSdTwjIwMjR47EmDFjsHPnTrRp04ajpIQL7du3xzfffINZs2YpatnZ2Zg/fz6+//57DpOR5o7u2Gkgb29v/PDDDwCAX3/9FT179lR6TceOHSEUCnHw4EFMnjz5pefr1q0bEhMTkZqaCrlcjlOnTqFv376K49XC2n8+KJfLYKalhSq5/JVPyj7VSU8P2ZWViCl9PPRVJZfjvkSCSrkcPAAmQiFKpFJcevRI8R4DgQDFDANtXV0AgLGxMVJTU1FdXf3CJ4M9PT1x9epVpKamAgCKi4uRnJyM/Px8yOVyjB8/HmvWrHllI000S8+ePRESEoKNGzcqTZoHgNOnT8POzg579+6FXE3WciSNY+bMmXj77bdZtR9++AG//fYbR4kIocZOI61Zswb//vsvnJ2dsXv3bmzfvr3G102YMAGurq5o9YonVvX09HDgwAGMHDkSzs7OsLGxwahRowA8HpIoEolqnW2+tTXGRERgSnQUuhkob85eE20+H9tsbbHuwQO8ExaG0RHhuFtWBiOhEKNbm2NoWCg+iIuF6zPDIuMtLLDu77+wY/duAMC6deswYMAAvPHGG+jUqVON12nVqhUOHjyId999F87Ozujbty9SU1ORmZmJfv36wcXFBXPnzsXq1atfmNXPzw+enp6IiopC27Zt8csvv9T694aoLy0tLSxbtgzR0dF48803lY4XFxdj7ty58PHxQWxsLAcJCRd4PB4OHToEExMTVn327Nl4+PAhN6FIs8djaPXNZsvPzw+jR4/GyJEjX/scMTExOP/LLxh+7Tq01GiXh2qBAGf79cXQcePg6OjIdRyiQRiGwffff4+PPvqoxt0ptLS0sHz5cqxcuRK6T+4YE832/fffK+Y1PzVmzBj8+uuvNU6FIaQh0R27ZsrR0RG5ubl455136nUec3Nz8IRCFBkYqCiZahQZGIAnFMLc3JzrKETD8Hg8TJ06FfHx8YqnyZ9VXV2NdevWwdnZGVevXuUgIWlskydPxujRo1m106dPK6bEENKYqLFrpmJiYnDhwgXw+fX7KyASiaBrYIDsOgzHNobslo9ziRoo1+jRo+Hq6sr6FR0d3SDXIurJzMwMx44dw6VLl9ClSxel40lJSRgwYABmzJiBR8/M/ySah8fjYd++faz1MgHgww8/RNaTNTwJaSw0FEvq7d9//0XEP/9gSGAQBGoweVzG5+OCtxfc33oL/fr14zoOaQbKy8uxbt06bNq0CdIalthp1aoVtm3bhkmTJtHQnAb77bfflBa4fvvtt3Hu3Dn6cyeNhu7YkXpzcXFBtb4+Mp77tPq8qupq5OXnIy8/H1VVVQ2WJ93MDFJ9fTg7O7/6xYSogJ6eHtavX4+wsDD07t1b6XheXh6mTJmCt99+G8nJyRwkJI3h3XffVVpl4MKFCzhy5AhHiUhzRI0dqTdTU1N0tLHBvfbWijXtnieVyfDo0SNUV1ehuroKBYUFNaxyV39yHg/321ujY9euMDU1bYArEPJiTk5OCAwMxO7du5UWrwWAv//+Gw4ODtiyZUuNd/ZI07dz505YWlqyah999JFiGSVCGho1dkQlvHx8UGpmhiQrK6VjDB7vkMEw/x+mlcsbZgZAopUVSs3M4OXt3SDnJ+RVBAIB5s6di/j4eKUJ9cDjYdulS5eiR48euHPnDgcJSUMSiUQ4ePAgq1ZSUoIZM2bQOoekUVBjR1TC0tISPby8cNfGBsX67PXpJGVlqKpi7/agp6cHVc84KdLXR0JXG/T09lb6xExIY7OyssLp06dx+vTpGneliIiIQK9evfDRRx+htLSUg4SkoQwbNgwzZsxg1a5cuYI9e/ZwlIg0J9TYEZXx8vKCaVsrhNrZQfrkaVupTIbi4mLW6wQCAYyMjFR6bSmfj1B7O4isrNCnTx+VnpuQ+hg9ejTi4uIwb948pQn0crkc27Ztg4ODA20gr2G+/fZbWFtbs2rLly9HUlISR4lIc0GNHVEZoVCIYSNGQNKmDW452EPG46GwoADMc7PpTExMwVfhE2JyHg+3HOxRbtkGQ0eMgLAOW5wR0hiMjY2xa9cuBAUF1bhgdlpaGoYPH44JEyYgJyeHg4RE1YyMjJQempBIJPDz83vlHtyE1Ac1dkSlLCwsMHrCeIitrfFf164ol7O/gRnoG0BHW7vG91ZVV0NWxzkoUj4fNxwdILa2xugJ42FhYfHa2QlpaJ6enggNDcX69euho6OjdPznn3+GnZ0dDh48SPOxNMDAgQMxb948Vi04OBjffvstR4lIc0Dr2JEG8e+//+Lc77/DsqwMdqGh0C8uhkAgROtWrZSGoxgAD3NzIXvSBBoZGcOwFjtZFOnrI9TeDuWWbTB6wni0b9++Ib4UQhpEUlISZs+ejStXrtR43MfHB/v374ednV0jJyOqVFZWBhcXF9y/f19R09HRQVhYGOzt7TlMRjQVNXZE5aqrq+Hp6Yn09HSMGDYMVqamsLl7F86PxNDV0lJ6fUlJCUpKSxT/zwMPFpaWL3y4Qs7jIdHKCgldbSCyssLQESPoTh1pkhiGwYkTJ7B48WKIxWKl49ra2lixYgVWrFhR4x0+0jQEBgaib9++ePbHbffu3REcHAytGr4nElIf1NgRlVu7di1Wr14N4PGDEn369MEgn75oXS5B59Q0tMvPV+xQwQDIzc2F/LkhW2NjExg893StjM9HupkZ7re3RqmZGXp6e6NPnz40p440eXl5eVi8eDG+++67Go/b2tpi//796Nu3byMnI6qyZMkSbN26lVVbu3YtVq1axVEioqmosSMqFR4ejp49e7IWX7Wzs8P58+dxJyQEyYmJEEokaJ+eDstHYmjn56G0hn00BQIBWrc2h1QgQJGBAbJbipDarh2k+vro2LUrvGhJE6KB/vnnH8yePRsPHjyo8fisWbOwceNGWny7CaqoqIC7uzvi4+MVNaFQiJCQELi6unIXjGgcauyIylRWVqJ79+6IiYlR1AQCAW7evInu3bsDAAoKChAVFYWo0FCUl5WhvKQEusUlMC0ugrCqCjyGAcPjQaqtjXJzc1QaGYEnFELXwADOHh5wdnamH2pEo0kkEqxduxZbtmyp8elJc3NzbN++HePHj6f9R5uYkJAQeHp6sv5cnZycEBISQkPtRGWosSMq8+mnn+Lrr79m1VatWoW1a9cqvVYmk+Hw4cPYsWMHzM3NYW5mBl1tbQgFAkhlMlRUVaG8qgqrV6+GlZUVRCIRBAJBY30phHAuIiIC/v7+CAkJqfH40KFDsWfPHnpoqIlZtWoV1q1bx6p9+umnWL9+PUeJiKahxo6oxM2bN+Hl5cVaosHV1RW3bt2C9guWN7G1tUVCQsJLz3v8+HFMmzZNpVkJaSpkMhl2796NTz/9FGVlZUrH9fX1sW7dOsyfP5/mmjYRVVVV6NmzJyIjIxU1Pp+P4OBg9OrVi8NkRFPQOnak3iQSCXx9fVlNnZaWFo4fP/7Cpk4ikbyyqQMez9kjpLkSCARYsGAB4uLiMHz4cKXjEokEixcvRu/evenfShOhra2N48ePs56Glcvl8PX1RXl5OYfJiKagxo7U28qVK5GYmMiqrVmzBs7Ozi98j76+PgYPHvzS8woEAowYMUIlGQlpyqytrREQEIBffvmlxqV9QkND0aNHDyxdurTGO3tEvbi4uChWDngqISEBK1eu5CgR0SQ0FEvq5dq1a+jfvz9rfaaePXsiKCjolUNDZWVl+OGHH/DXX3/h9OnTirpIJMLChQsxePBgGpog5DmFhYVYsWIF9u3bV+PxDh06YO/evRgyZEgjJyN1IZVK0adPH9YcSh6Ph3///ZeWtSH1Qo0deW2lpaVwdnZGcnKyoqarq4vw8HDY2trW+jy//fYbxo4dq/j/bt264e7duyrNSoimCQwMhL+/P2v5jGdNmjQJ3377LczNzRs5Gamt+Ph4uLm5obKyUlHr2LEjoqKiYGhoyGEy0pTRUCx5bUuXLmU1dQDw1Vdf1ampI4S8Hm9vb4SHh2Pt2rU1zmU9deoU7OzscOTIEdDnd/VkZ2eHr776ilVLTk7GsmXLOEpENAE1duS1XLx4UWkoyMfHBwsXLuQoESHNj46ODlatWoWoqKgah+8KCgowc+ZMDBgwQGkeLFEPCxcuhLe3N6u2d+9e/PPPPxwlIk0dNXakzgoLCzFz5kxWzcDAAEePHgWfT3+lCGls3bp1w9WrV3Ho0CGYmJgoHf/333/h7OyMdevWoaqqqvEDkhcSCAQ4duwY9J/bQnHGjBkoKiriKBVpyuinMKmzRYsWISMjg1XbvHkzOnfuzFEiQgifz8fMmTNx9+5dTJo0Sel4ZWUlVq1aBTc3NwQHB3OQkLxI586dsXnzZlYtIyMDixYt4iYQadKosSN1EhAQgOPHj7Nqb775JmbPns1RIkLIs8zNzfHDDz/g/PnzNe5KERcXBy8vL8ydO5fuCKmR2bNnY9CgQazasWPH8Oeff3KUiDRV1NiRWnv06BH8/f1ZNSMjIxw+fJj2rCREzbz99tuIjY3Fxx9/XOMUib1798LOzg6//fYbPVyhBvh8Pg4fPgwjIyNWfdasWXj06BFHqUhTRI0dqbV58+YhNzeXVdu+fTvatWvHUSJCyMsYGBhgy5YtCAkJgbu7u9Lx7OxsjB07FqNGjUJ6ejoHCcmzrK2tsW3bNlYtNzcXH374ITeBSJNEjR2plZ9//hk//fQTqzZ8+HD4+vpylIgQUlvu7u64desWvvnmG6VJ+sDjKRb29vbYuXMnZDIZBwnJU35+fkrbx/3444/45ZdfOEpEmhpq7Mgr5eTkYO7cuayaSCTCgQMHaAiWkCZCKBTio48+QmxsLN5++22l46WlpViwYAH69OmDqKgoDhIS4PHuEwcOHICpqSmrPmfOHKURE0JqQo0deSmGYfDBBx8ozfHYvXs3LC0tOUpFCHldHTp0wLlz5/Djjz+idevWSsdv374Nd3d3fPLJJ7QpPUcsLS2xe/duVu3Ro0f44IMPaD4keSVq7MhLnTx5EgEBAazauHHjMGHCBI4SEULqi8fjYcKECYiPj8f777+vdFwmk2Hjxo1wdHSkhXI5MnHiRNZWiwDwxx9/4LvvvuMoEWkqqLEjL5SRkYEFCxawaq1bt8aePXtoCJYQDSASiXDw4EH8+++/6Natm9LxBw8e4K233sK0adOQl5fHQcLmi8fjYc+ePWjVqhWrPn/+fKV1RAl5FjV2pEYMw2DmzJlK61zt378fZmZmHKUihDSEfv36ISIiAp9//jm0tLSUjp88eRJ2dnY4ceIEDQU2olatWuHAgQOsWlFREWbOnEl/DuSFqLEjNTpw4AAuXrzIqr333nsYNWoUN4EIIQ1KV1cXX3zxBSIiIpT2LgUez/Hy9fXFm2++iXv37nGQsHkaNWoUpk6dyqpdvHgRBw8e5CgRUXfU2BElDx48wMcff8yqWVlZYfv27RwlIoQ0Fnt7e1y7dg379++HsbGx0vHLly/DyckJX3/9NaqrqzlI2Pzs2LEDbdq0YdUWL16M5ORkjhIRdUaNHWGRy+WYMWMGysrKWPVDhw4pPX5PCNFMfD4f/v7+iI+Px7hx45SOV1RU4NNPP4WHhwdu3rzJQcLmxdTUFIcPH2bVysrKMH36dMjlco5SEXVFjR1h2blzJ65du8aq+fv7Y8iQIRwlIoRwxdLSEj///DP+/PPPGneYiY6ORp8+fTB//nwUFxdzkLD5GDJkCGbNmsWqXbt2DTt37uQoEVFX1NgRhYSEBHzyySesWocOHbBlyxaOEhFC1MHw4cMRFxeHRYsWKe07yzAMdu3aBXt7e/zxxx8cJWwetm7divbt27Nqn3zyCRISEjhKRNQRNXYEACCVSuHr64uKigpW/ejRo2jRogVHqQgh6sLQ0BDffvstbt68CRcXF6XjmZmZGDVqFN59911kZmZykFDztWjRAkePHmXVKioq4OfnR1vBEQVq7AgAYMuWLbh16xartmDBArzxxhvcBCKEqKUePXogJCQEmzZtgp6entLx06dPw97eHnv27KH5Xw2gf//+mD9/Pqt28+ZNGlkhCjyGFsNp9qKjo+Hh4cF6ws3GxgYRERE1bhiuar/99htrhfVu3brh7t27DX5dQkj9PHjwAHPmzFFaGukpT09PHDhwAI6Ojo2cTLNJJBK4uroiKSlJUdPW1kZoaCj9XhO6Y9fcVVVVwdfXl9XU8fl8HD9+vFGaOkJI09WpUyf89ddf+O6775R2SACAGzduwM3NDZ999pnSNA/y+vT19XHs2DHWfMeqqipMmzaNlqAh1Ng1d+vXr0d4eDirtnTpUnh6enKUiBDSlPB4PEyZMgXx8fGYPn260nGpVIr169fD2dkZV69e5SChZurTpw+WLFnCqoWHh2P9+vUcJSLqgoZim7HQ0FD06tWLNenWwcEBoaGh0NHRabQcNBRLiOa4cuUKPvjggxfuTuHn54ctW7agZcuWjZxM81RUVMDDwwNxcXGKmlAoxM2bN+Hh4cFhMsIlumPXTFVUVGDatGmspk4oFOL48eON2tQRQjTLgAEDEBUVhZUrV0IoFCodP3bsGGxtbfH999/Tfqf1pKurixMnTkAgEChqT1c4qKys5DAZ4RI1ds3U6tWrWZ/yAGDlypX0KY8QUm96enpYt24dwsPD0bt3b6Xj+fn5mDp1KoYMGYIHDx5wkFBzeHh4YOXKlaxabGwsVq9ezVEiwjVq7Jqh4OBgbN68mVVzc3NT+uZACCH14ejoiKCgIOzevbvG9TAvXrwIR0dHbN68GVKplIOEmmHlypVwc3Nj1TZv3owbN25wlIhwiRq7ZqasrAy+vr6sIRBtbW2cOHECWlpaHCYjhGgiPp+PuXPnIj4+HmPGjFE6Xl5ejmXLlinWxyN1p62tjePHj7O+h8vlcvj6+kIikXCYjHCBGrtmZsWKFUqTmteuXUtrHxFCGpSVlRV+++03nDlzBlZWVkrHIyIi0Lt3b3z00UcoLS3lIGHT5uTkhLVr17JqSUlJWLFiBUeJCFfoqdhm5OrVqxgwYACr1rt3bwQGBrIm3zYGmUwGsViM3NxcXLt2DUHXr0NXRwdCPh/aurrw9vFBKwsLmJubw9zcHCKRqNEzEkIaRnFxMVauXIndu3fX+ABFu3btsGfPHgwfPpyDdE2XVCqFt7e30i5CV65cQf/+/TlKRRobNXbNRHFxMZydnZGamqqo6enpISIiAl27dm20HAUFBYiMjER0WBgqysrASKXQLS6Gbm4uhFVV4Mnl4AmF0DIyQpFIhFI9PfCEQugaGMDJ3R0uLi4wNTVttLyEkIZz8+ZNzJo1CzExMTUeHzduHLZv3w5LS8tGTtZ0JSQkwNXVlbUgdIcOHRAVFUX7fjcT1Ng1E/7+/jh48CCrtm3bNixcuLBRrp+VlYXgwEAkJyVBSyKBdVo6LMViGJeVQVpWhoICseK1QqEQrVu1BgBUCwQoMjBAtkiENOt2qNbXR0cbG3j5+NA3e0I0QHV1NbZs2YIvvviixiU6jI2NsWnTJrz//vusnRbIi23btg0fffQRq+bv74/9+/dzlIg0JmrsmoELFy5g6NChrFq/fv1w5cqVBv9GKZVKERQUhJCgIBjm56NLahra5udD8Mzm4OUVFS9s7J4l4/ORYWaGe+2tUWpmhh5eXvDy8qpxrSxCSNOSlJSE2bNn48qVKzUe9/b2xoEDB2BnZ9fIyZoeuVyOAQMG4Nq1a6z6hQsXMGTIEI5SkcZCjZ2GKygogKOjI7KyshQ1Q0NDREVFoWPHjg167ZycHJwLCEBBRiZsk5Jgk5kJfg1/3Wrb2D0l5/GQZGWFuzY2ELW1wtARI2BhYdEgXwMhpPEwDIOTJ09i8eLFePTokdJxLS0tfPrpp1ixYgUtpP4KDx48gLOzM8rKyhQ1KysrREdH03QWDUf3tTXcggULWE0dAGzdurXBm7rU1FT8eOIEZHHx6H/rFrplZNTY1L0OPsOgW0YG+t+6BWlcPH48cZI1d5AQ0jTxeDxMmzYN8fHxeO+995SOV1dX44svvoCLiwuuX7/OQcKmo1OnTti6dSurlpmZ2WjTbwh36I6dBjtz5ozSulGDBw/GhQsXwOPxGuy6qamp+O3UKbRMTUPPuDgInxl2rUld79g9S8rn45aDPcTW1nh30iS0b9++XtkJIerjn3/+wezZs1+4O8X777+PTZs20R2oF2AYBkOGDMHFixdZ9TNnzmDUqFHchCINjho7DZWXlwcHBwfk5eUpasbGxoiJiUHbtm0b7Lo5OTn48cQJmCSnwDM2tlZ36SoqKyEW/3/YRUuohVatWtX6mnIeDzccHVDYoSMmTnuPhmUJ0SASiQRr167Fli1bWHtbP2Vubo7t27dj/PjxDfqBtalKT0+Hk5MTioqKFLXWrVsjNjYWZmZmHCYjDYWGYjUQwzCYM2cOq6kDgJ07dzZoUyeVSnEuIAD6WdnoFRdX66FXbW1t8Hj//6uoq6tbp+vyGQa9YuOgl52F8wEBtDURIRpEX18fGzZsQGhoKHr06KF0PDc3FxMnTsTw4cNpSkYN2rVrh+3bt7NqDx8+xJw5c2pcQ5A0fdTYaaAff/wRv/32G6s2cuRITJ06tUGvGxQUhIKMTHjEx79y+PVZfB4PrVq1gqFhCxgbm8DwNdZaEsrl8IiLhzgzE8HBwXV+PyFEvbm4uODGjRvYvn07DA0NlY6fP38e9vb2+Pbbb+nD3XOmTZuGESNGsGq//vorfvrpJ44SkYZEQ7EaJjs7Gw4ODigoKFDUWrZsidjYWJibmzfYdbOysvDDsWOwjY5Bt4yMBrvOq9xt2xYJTo6YMn06rXNHiIZKT0/HvHnz8Oeff9Z43MPDAwcPHoSbm1sjJ1NfOTk5cHBwgFj8//nMIpEIMTEx9L1Sw9AdOw3CMAxmzZrFauoAYO/evQ3a1AFAcGAgDPPzYZOZ2aDXeZWumZkwzM9HUGAgpzkIIQ2nXbt2+OOPP/Drr7/W2JQ8HbZdsmQJa7mP5szCwgJ79+5l1cRiMfz9/WlIVsNQY6dBjh07hnPnzrFqEydOxLhx4xr0ugUFBUhOSkKX1DSVLWnyuvgMg86paUhOTFRqcAkhmoPH4+Hdd99FXFwcZs+erXRcJpNh69atcHBwwIULFzhIqH7Gjx+P8ePHs2pnz57F8ePHOUpEGgI1dhoiLS1NaX0iCwsL7Nq1q8GvHRkZCS2JBG3z8xv8WrXRLj8fQokEUVFRXEchhDQwExMT7N27F4GBgbC3t1c6npqaiqFDh2Ly5MnIzc3lIKF62b17t9IIzsKFC5Gens5RIqJq1NhpALlcjpkzZ6KkpIRVP3DgAFq2bNmg15bJZIgOC4N1WjprmzAuCeRytE9PR1RoaI3LIxBCNI+XlxfCw8Px5ZdfQltbW+n4qVOnYGdnh8OHDzfroUczMzMcOHCAVSsuLsaMGTOa9e+LJqHGTgNoaWnh0qVLrJqfnx/eeeedWp9j06ZNr3VtsViMirIyWD4zIRcAdqWlYmhYKIaHhWJMRDjSKypeep6DGexPi3V9f8+bN1j/b/nocS7xc7met23bNlRVVb30NbURERGB3r17w9HREe7u7vj333/rfU5CSN1oa2vjs88+Q1RUFPr166d0vKCgAO+//z769++PhIQEDhKqhxEjRsDX15dVu3TpEvbt28dRIqJK1Ng1cffv34f8uTtl7dq1w7Zt2+p0ntdp7GQyGXJzc8FIpTApLVXUw4qLcauoCH+4uuGsuwf22NnDSCh46bkOPvMk7eu8/3nGZWVgpNJXDr3UtbF7/vf6KQMDA3z//feIiYnBd999hxkzZtQpLyFEdbp164arV6/i8OHDNe5Kce3aNTg7O+PLL79UyQe7pmjbtm2wsrJi1ZYuXYr79+9zlIioCjV2TZhMJoOfn59S/fDhw7h58yY8PT3h5uaGqVOnKr55+fv7w8PDAw4ODtiyZQsAYOXKlSgsLISrqytmz56NlJQUdO/eXXG+JUuW4NixYwCADh064JNPPoGbmxuuXLmC7777DrsPHMDoO3fw1ZNtf/KqqmAq1IIW//FfLwsdHRgLtQAA/xUUYHxkBEaGh2FJwl1UyeX4JiUFJVIpRoSH4fN7SXV+//MOZKRjQugdbN+zBzt37lTU169fDycnJzg7O+Pbb7/F7t27kZWVhT59+ijWeDp58iScnJzg6OiIzZs3AwBSUlLg5OSEiRMnwt7eHuXl5UrXtLGxQefOnQEAdnZ2KC0tpWFgQjjE4/EwY8YMxMfHY9KkSUrHq6qq8Pnnn8PNzQ1BQUEcJOSWiYkJjhw5wqqVlZVh+vTpL/wAS5oIhjRZW7duZQCwftna2jJ5eXnMwIEDGYlEwjAMw6xatYrZtWsXwzAM8+jRI4ZhGKa6uprp3bs3k5aWxjAMw7Rs2VJx3uTkZMbDw0Px/x9//DFz9OhRhmEYpn379opzxcXFMT179GCOT5/OJHr7MCNbtWb22zswYb09ma76+kxnPT1mmmUb5jcXVybR24e52as342lswkR59mESvX2Yee3aMZ936swkevswJkIhk+jtwyR6+9Tr/UccHJmplpZMgpc3c3z6DMbe3p6Jjo5mzp07xwwYMICpqKhg/T60b9+eKSkpYRiGYTIyMphOnToxjx49YsrLyxk3Nzfmzp07THJyMiMQCJjIyMha/bmcPn2aGTJkyGv9mRJCGsaFCxeYDh06KH3PfPpr9uzZTEFBAdcxG90HH3yg9HvxzTffcB2L1APdsWui4uPj8emnn7JqHTt2REhICG7evImoqCh4enrC1dUVv/zyC5KTkwE8nkDs5uYGd3d3JCQk4O7du3W+9tPlUy5fvoykpCR8/vvvGBEehsiSEqSVl8NQKMTvbu74rFNn6Aj4mB4Tg6CCAkSWFCNBUobxUZEYER6GC/n5yKhUnjtX2/efz8tDqkTCmvAbWFiAf8UFGBkRjs9/P4O8vDwkJibi0qVLmD59OnR0dAA8XpjzeSEhIRg4cCBEIhF0dXUxduxYBD5ZD69r165wdnZ+5e/NgwcPsGzZMtadQkII94YMGYKYmBgsWbIEfL7yj759+/bB3t4ev/32W7N6iGDz5s3o2LEjq7ZixYrX+tlA1IOQ6wCk7qRSKXx9fVFZWamo8Xg8HDt2DIaGhpDL5Rg2bBiOHj3Ket+DBw+we/du3LhxA8bGxhg7dizrHE8JhULWrfjnX6Ovrw/g8Xyzvt7emCoSweVBMvscPB68TE3hZWoKkVALl8SP4G1iijdMRdjQtesrv8aXvX99ly7Iz8+HTPZ426DsnBwwDIOCwgJUVlXhgzaWGGvZBjFdOqOkTx+MGTNG0aC9rqdf88uIxWKMHDkS+/fvR5cuXep1PUKI6hkYGGDz5s2YNGkSZs2ahbCwMNbx7OxsjB07Fu+88w52796Ndu3acZS08bRo0QJHjx7FG2+8oahVVlbC19cXQUFBEAqpTWhq6I5dE7Rx40aEhISwaosWLULfvn0BAJ6enrh69apiQ+zi4mIkJyejpKQEhoaGMDIyQkZGButJWoFAoJgT1rp1a2RlZaGkpASlpaX4559/aswxcOBAhISGouhJ4/eoqgoPq6rwQCJB2pN5aAzDIFFShjY6OnAzaoFbRYXIfPKEa6lUqnjaVcDjQfbkU/Kr3p9cXASZTIoyuRzZ1dUAGDAMg/LycjgLhfg5Oxup2VkokZQj6OZNLFiwAHw+H7t27ULpk4c8nj4t26JFC8UyMT179sTly5dRUFCAyspKnD59Gj4+PrX6M6mqqsLo0aPx8ccfY8CAAbV6DyGEG+7u7rh16xa++eabGj+0/fnnn7C3t8eOHTuaxVzZfv36YdGiRaza7du3X3u1BMIt2iu2iYmMjESPHj1QXV2tqAkEApSUlEBPT09R++eff7BixQpUVVWBz+dj27ZteOONN+Dr64ubN2+iQ4cO0NLSwuzZszF8+HAsW7YMZ8+eRd++fbFv3z5888032LNnD6ytrdGyZUsMGzYMfn5+6NChA2JiYhSbcH84dy7+/Okn6FVUQIvPx0abrqhk5Fh7/z5Kn3xDdDAwxJddukBXIEBQQQG2pqagWi4Hj8fDyo6d0MvEBJuSk3FV/Ag9jI0x3sLipe/fnJyMCmk1eAA+NDODm54eRiQnI+DJcMLPhYX4q6QEFXp6qNLSQv4zCyfzeDzo6urCyckJkyZNwv379/H333/D1tYWAQEBOHHiBDZv3gyGYeDr64ulS5ciJSUFY8eOxZ07d1745/Ldd99h5syZsLOzU9QuX77c4OsIEkLqJyUlBXPnzn3h7hQ9evTAwYMH4eLi0sjJGld5eTlcXV2RmJioqGlpaeHOnTu1moZC1Ac1dk1IVVUVevTowdpRgc/nIzg4GL169eIk0+XLl5Hw999488bNRr1umUSCstJSSGUyPJ7vq+zmoDdxMSkRV65ceeX52rVrBxcXF7i6usLFxQUuLi7o3LlzjXNxCCGahWEY/Pzzz1iwYAEePnyodFwgEGDJkiX4/PPPazUto6m6efMmvLy8WFNxXFxccPv27RoXfSbqiX5qNSFffvml0jZZn3zyCWdNHQCYm5ujVE8P1YK6rTNXXwb6+mjdujUsLCxgZmYGY2MT6OsbQFtLGzweH1KhEOUtDGu9hVB6ejrOnj2LdevWYdy4cejatSuMjIzg6emJOXPmYN++fbhx44ZiKJcQojl4PB4mTJiAu3fvYtasWUrHZTIZNm7cCCcnpxdOTdEEvXv3xrJly1i1yMhIrFu3jqNE5HXQHbsmIiQkBJ6enqz5Hk5OTggJCVE86cmFvLw8HNu3D943b8GsuJizHM9iAOQaGuK/Ht1RzjCIiIhAZGSkyvZC1NbWhp6enuKXsbExwsLCwOPxVHJ+Qgi3rl+/Dn9//xfuTvHee+9h69ataNWqVSMna3iVlZXo3r07YmJiFDWBQIAbN26gR48eHCYjtUWNXRNQXl4ODw8PxMfHK2pCoRAhISFwdXXlLhgef5Lds307rMIj4JSSwmmWZ0V37IBMV1fMXbgQgid3E8ViMaKiohAZGalo9mJjY1Wy8rypqaliCPfpLwcHB06bbkLI66usrMTXX3+Nr776ijWn+amWLVti69atmDZtmsZ9qAsPD0fPnj0hlUoVNTs7O4SFhUFXV5fDZKQ2qLFrApYsWYKtW7eyamvXrsWqVas4SsT277//IuKffzAkMAgCNVixXMbn44K3F9zfeqvG/SKfVV1djYSEBEWj9/RXTfNs6kooFMLW1pbV7Lm6uqJ169b1PjchpHHEx8fD39//hUsmDRgwQCOXOFq7di1Wr17Nqi1ZskSxIw9RX9TYqbnAwED07duXtWBm9+7dERwcDC0tLQ6T/V9BQQEO7dkDt7BwtFdBQ1RfKa1bI8LdDe/PnVvjPpG1kZOTo9Ts3b17VyVb7VhYWLAaPRcXF3Tt2pXWiyJETcnlchw6dAjLli1DUVGR0nFdXV18/vnnWLJkidp8X66v6upqeHp6IjQ0VFHj8Xj477//4OXlxWEy8irU2KmxsrIyuLi4sDZl1tHRQVhYGOzt7TlMpuzXn39G/s2b6H8nFHwO/0rJeTxc7e4BM09PjH2yQ4aqlJeXIzY2ltXsRUZG1viNvq50dXXh4ODAavacnZ1hYmJS/+CEEJXIzs7GwoUL8csvv9R43NHREQcPHkTv3r0bOVnDiI2Nhbu7O2u6SufOnREZGQkDAwMOk5GXocZOjX344YfYvXs3q7Z582YsWbKEo0Qvlp2dje+PHoVtdAy6ZWRwluNu27ZIcHLElOnTYWlp2eDXYxgGqampSs3es814fbRv315pGZaOHTvSMiyEcOjs2bOYO3dujQ9k8Xg8zJ07F1999RWMjIw4SKdamzZtwvLly1m1Dz/8kLZNVGPU2Kmpy5cvY9CgQayal5cXrl27pngYQN1cu3YNIZevoP+tWzCSSBr9+kX6+vi3dy/0HDhQsQsHV4qLixEdHa1o9CIiIhAdHY3yJztq1IehoSGcnZ1ZzZ6Tk5NGr69FiLopLS3FqlWrsGPHjhqnaFhZWWHXrl0YNWpU44dTIZlMBh8fH9y4cYNVv3TpEgYOHMhRKvIy1NipoaKiIjg7OyMtLU1R09fXR2RkpFpP0JVKpTh+5AhkcfHwCQ+HsBEfpJDy+bju7gYtOztMmzFDLeeryWQy3Lt3j9XsRUZGIjMzs97n5vF46Nq1q9KTuVZWVhr3xB4h6iQkJASzZs1CZGRkjcdHjx6NnTt3wsrKqpGTqU5SUhJcXFxYH0ytra0RHR2tEXclNQ01dmpo5syZOHLkCKu2c+dOfPjhhxwlqr2cnBz8eOIkTFKS4RkT2yjz7eQ8Hm44OqCwQ0dMnPYeLCwsGvyaqpSfn4+oqCjWwxpxcXE1LrFQVy1btlRq9uzt7WkVeUJUqLq6Gtu2bcPq1atrvCvfokULbNiwAbNnz26y0yh27tyJBQsWsGozZ87EoUOHOEpEXoQaOzVz7tw5DB8+nFUbMGAA/vnnnybzDSE1NRW/nToFUVoaesXGNeidOymfj1sO9hBbW+PdSZPQvn37BrtWY6qqqsLdu3eVnsx9dt/b16WlpQU7OzulJ3PNzMxUkJyQ5uvBgweYM2cOLl68WONxT09PHDhwAI6Ojo2crP7kcjkGDRqEq1evsurnzp3D0KFDOUpFakKNnRoRi8VwcHBATk6OotaiRQtER0c3uYYlNTUVZ376GfpZWfCIj2+QOXdF+voItbdDuWUbjJ4wvsn9HtUVwzDIzs5WavYSExNVsgxLmzZtlJo9GxsbtZ3TSYg6YhgGP/zwAz766CPk5eUpHRcKhVi2bBk+++wz6OnpcZDw9aWkpMDJyYm1taKlpSViYmIgEok4TEaeRY2dGpkyZQp++OEHVu3QoUOYOXMmR4nqJycnB+cCAlCQkQnbpCTYZGaqZGhWzuMh0coKCV1tILKywtARI5rc8KsqSSQSxMTEKD2ZW1JSUu9z6+npwdHRUWkZFppXQ8jLPXr0CEuXLsXRo0drPN6lSxfs378fAwYMaORk9XPw4EH4+/uzalOmTMF3333HUSLyPGrs1MSvv/6Kcc+tuzZ06FCcPXu2SU9+l0qlCAoKQkhQEAzz89E5NQ3t8vNfa4cKGZ+PdDMz3G9vjVIzM/T09kafPn3U8kEJrsnlcqSkpLAavYiICKSoaNu3jh07sp7KdXFxQYcOHZr031VCGsKVK1fwwQcf4N69ezUe9/Pzw5YtW9CyZctGTvZ6GIbB0KFD8ddff7Hqv/32G8aMGcNRKvIsauzUwMOHD+Hg4MCaP2VqaoqYmBi0adOGw2Sqk5WVheCgICQnJkIokaB9ejosH4lhXFYGLZnshe+rFghQZGCA7JYipLZrB6m+Pjp27Qovb+9GWadO0xQVFSntlxsTE4OKiop6n9vIyEjpQQ1HR8cmN9xEiKqVl5dj/fr12LhxI2v/1afMzMywbds2TJ48uUl8OMrMzISjoyMKCwsVtVatWiEmJoa2TFQD1NhxjGEYjBkzBr///jur/t1332HKlCnchGpABQUFiIqKQlRoKCrKysBIpTAsL4eRuADaUin4jBxyHh9VQiGKRaYo1dMDTyiEroEBnD084Ozs/NrbhJGaSaVSJCUlsZq9yMhIZGdn1/vcfD4f3bp1U9ov18LCokn8ACNElWJiYuDv76+0JtxTb731Fvbu3YtOnTo1crK6++677/Dee++xamPGjMGvv/5K/7Y5Ro0dx5rrPw6ZTAaxWIzc3Fzk5uYiLycHVRUVkEmlEAiF0NbVRSsLC5ibm8Pc3BwikYgm8Teyhw8fKs3bi4+Pr/GOQ121atVKqdmztbXVmH02CXkRuVyO/fv3Y/ny5TXOg9XT08OaNWvw0UcfqfW/hxfdlPj+++8xefJkbkIRANTYcaqm29lmZmaIjY2l29lELVVWViIuLk6p4ROLxfU+t7a2Nuzt7ZW2UKOn7YgmyszMxIIFC3D69Okaj7u4uODgwYPo0aNHIyervdzcXDg6OrKmEZmYmCA2NlZjphE1RdTYcYQmoBJNwTAMMjMzlZZhSUpKgiq+vbRt21ZpGZbOnTvTHVyiEX7//Xd8+OGHNe5Aw+fzMX/+fHz55Zdo0aIFB+leTVMf/GvKqLHjyKFDhzBr1ixWjR4ZJ5qkrKyMtV/u019lZWX1Pre+vj6cnJyU9stV1x9+hLxMcXExVq5cid27d9f4Yahdu3bYvXs33nnnHQ7SvdrkyZNx6tQpVq0pL9XV1FFjx4EXLfIYGxtLDwYQjSaXy/HgwQOl/XKf3Re5Pjp37qy0DIu1tTXdOSBNws2bNzFr1izExMTUeHzs2LHYsWOH2q0IoEmL62sCauwaGW3LQoiyp09LP9vsxcbGorKyst7nNjExUVqGxcHBAbq6uipITohqVVdXY+vWrfjiiy9qXIbI2NgYGzduxKxZs9Rqm8mzZ88q3VFsatthagpq7BoZbaRMSO1IpVIkJCQoLcOSm5tb73MLBALY2toqPZlrbm6uguSE1N+9e/cwe/ZsXL58ucbjXl5eOHDgAOzt7Rs52YvNmDFDaaeNXbt2Yd68eRwlap6osWtESUlJcHFxQXl5uaJmbW2N6Oho2qKJkFrKyclRmrd39+5dyF6y0HVtmZubKzV73bp1o91NCCcYhsHJkyexePFiPHr0SOm4lpYWVqxYgRUrVqjFHeiioiI4OTkhPT1dUdPX10dkZCS6dOnCYbLmhRq7RiKTyeDj46O0MOXly5eb3F6BhKibiooKxMbGKjV8zy4l9Lp0dHTg4OCgtAyLiYlJvc9NSG3k5eXh448/xsmTJ2s83q1bN+zfvx/9+vVr5GTKLl26hDfffJNV8/LywrVr1+hJ9kZCjV0j2bx5M5YtW8aqffjhh9i5cydHiQjRbAzDIC0tTWm/3Pv376vk/NbW1krNXqdOnWg+EWkw//zzD2bPno0HDx7UePz999/Hpk2bOH8Ib968edizZw+rtnnzZixZsoSjRM0LNXaNIDY2Fu7u7qiqqlLUunTpgoiICBgYGHCYjJDmp6SkhLUMS0REBKKjoyGRSOp9bkNDwxqXYaF/50RVJBIJvvzyS2zevLnG6QetW7fG9u3bMWHCBM6eBi8tLYWrqyvrQ5SOjg7CwsLUak6gpqLGroFVV1fD09MToaGhihqPx8N///0HLy8vDpMRQp6SyWS4f/++0oMaGRkZ9T43j8eDzf/au/OAKKu9D+DfWYBhWGcYYNgEZZdFRdQCtbTSq+Xaq5ktWjdR8uabXbzea9pmZuUSbyaplbabWlZU3Ky3Ul/BLVFg2EFA9kVm2GaAmXme9w+V6zRgwjwsM/w+f13PA+c5A97O1+c553cCA4125np7e1MZFtJnGRkZiIuLw7lz57q9PmvWLCQlJcHPz29gB3bdqVOnMHXqVIO6fNHR0Th9+jStWe1nFOz62SuvvIIXX3zRoC0hIQHbtm0bpBERQm7X1atXkZmZaRD2srOzodVqTe5bKpUabdQIDQ2FjY0NByMnw4Fer0dSUhI2bNhgUBf1BrFYjM2bN2PNmjWDEqYSEhKwY8cOg7bNmzdj48aNAz6W4YSCXT+6ePEiJk6caHBoemhoKNLT04fEDiZCSO9ptVrk5eUZHaFWX19vct9CoRChoaFGR6i5urpyMHJiqcrLy7F69Wp899133V6PiorCe++9h6ioqAEdl0ajQVRUFPLy8rrahEIhzp8/j7Fjxw7oWIYTCnb9pKOjA9HR0QYVxAUCAc6cOYPo6OhBHBkhhGssy6KmpsYo7OXn54NhGJP79/DwMAp7gYGB9EqLdGFZFkePHsUzzzyD6upqo+t8Ph9r167Fyy+/PKBrPs+dO4eYmBiD9YCRkZE4d+4cPZ3uJxTs+smGDRuwdetWg7ZNmzbhlVdeGaQREUIGmkajgUKhMCrD0tzcbHLfIpEI4eHhBmEvMjISTk5OHIycmCuVSoV//etf2LNnT7fXfX198e6772LWrFkDNqaNGzdiy5YtBm0bNmwwaiPcoGDXD86cOYPY2FiDf6mPHTsWZ8+ehbW19SCOjBAy2FiWRWlpqVEZlpKSEk769/PzMyrDMnLkSNqoMcykpqYiLi4OOTk53V5fsmQJEhMTB+S0lc7OTkyYMAGZmZldbXw+H2lpaZg0aVK/33+4oWDHMbVajXHjxqGgoKCrzcrKCr///jsiIyMHcWSEkKGsubm567zcG2FPoVAYnFTTVw4ODka7csPDwyEWizkYORmqOjs78eabb2Lz5s0G5bZucHZ2xvbt2/Hkk0/2e/DPyMjAhAkTDDYeBQcH4+LFi7C1te3Xew83FOw4tnbtWiQmJhq0bdmyBRs2bBicARFCzJZer0dhYaFRGZaqqiqT++bz+QgKCjIKfJ6envR0z8Lk5+dj5cqVOHHiRLfX77rrLuzduxfBwcH9Oo4tW7YY7Yh97rnnjHbOEtNQsOPQiRMnMG3aNIO6PRMnTkRqaiotciaEcKahocEo7OXk5BjswO8rmUxmVIYlJCSElpGYOZZlceDAASQkJECpVBpdt7a2xvPPP4/169f326YGnU6HmJgYnD9/vquNx+PhxIkTmDJlSr/ccziiYMeRlpYWjBkzxmCdjEgkwsWLFxESEjKIIyOEDAednZ3Izc012pnb3eHxvWVlZYXRo0cb7cx1cXHhYORkINXW1mLt2rU4ePBgt9dDQ0Oxb98+TJ48uV/un5ubi3HjxqGjo6OrbdSoUcjIyIC9vX2/3HO4oWDHkVWrVmHv3r0GbTt37sTatWsHaUSEkOGOZVlUVlYa7cotKCgAF//p9/LyMgp7AQEBdNi7Gfjxxx8RHx+P0tLSbq+vXLkSr7/+OpydnTm/944dO4zOjY2Pjzc6X5b0DQU7Dhw7dgx/+ctfDNqmTJmC48eP04HghJAhp62tzaAMy6VLl5CZmdnt6QW9JRaLER4ebrArNzIyEg4ODhyMnHCpra0NL730Enbu3NltvUW5XI5du3bhwQcf5HTdpV6vx913341Tp04ZtP/000+47777OLvPcEXBzkQqlQrh4eGorKzsarOzs0NGRgb8/f0HcWSEEHL7GIZBSUmJQdjLyMhAWVkZJ/2PGjXKIOyNGTMGvr6+tFFjCEhPT0dcXJzBmeY3mzNnDnbv3g0fHx/O7llcXIzIyEio1equNm9vbygUCqrFaCIKdiZavnw5PvroI4O2pKQkxMfHD9KICCGEOyqVqqsMy42wp1AoDNZI9ZWTk5PRrtywsDAqfzEIdDoddu3ahY0bNxqErRvs7e3x6quv4m9/+xtnr9qTkpKwevVqg7YnnngC+/fv56T/4YqCnQmSk5Mxb948g7b77rsPx44do3+FEkIslk6nQ0FBgdFGjZqaGpP7FggECA4ONtqZK5fLORg5+TNlZWV4+umnkZKS0u31CRMmYN++fZyc9cowDGbMmIFffvnFoP27777DAw88YHL/wxUFuz5qaGhAeHg4amtru9ocHR2hUCg4fVxNCCHmora21mijRm5ursE5oX3l5uZmtFEjODgYVlZWHIyc3IxlWRw5cgRr1qwxmONuEAgE+Pvf/44XX3zR5CLXV65cQXh4OFpaWrra5HI5FAoF7bruIwp2ffTQQw/h8OHDBm0HDhzA8uXLB2dAhBAyBLW3tyMnJ8co8HVXS623rK2tERYWZnSEmkQi4WDkRKlUYv369Xjvvfe6vT5y5Ejs2bMHM2bMMOk++/fvx1//+leDtiVLlvRYkoXcGgW7Pjh06BCWLFli0PbAAw8gOTmZXsESQsifYFkW5eXlRmGvqKiIkzIsPj4+RmHP39+fqhT00cmTJxEXF4f8/Pxurz/66KPYuXMnXF1d+9Q/y7KYM2cOfvjhB4P2w4cPY9GiRX3qczijYNdLNTU1CAsLQ2NjY1ebVCqFQqGAh4fHII6MEELMW2trK7Kysgx25mZlZaGtrc3kvu3s7BAREWEQ9iIiIqgo7m3q6OjA1q1b8dprrxmc93qDVCrFzp078fjjj/fpAUd1dTXCwsIMnuS6uLggOzsb7u7uJo19uKFg1wssy2L+/PlITk42aD948KDREzxCCCGmYxgGxcXFRmVYysvLTe6bx+PB39/fqAyLj48PvX3pQW5uLuLi4oxq0N0wffp07NmzB4GBgb3u++DBg1i6dKlB2/z583H06FH6ffQCBbs/wTAMOjo6YGtri48++shoDd2iRYtw6NAh+ktHCCEDqLGx0agMS3Z2Njo7O03uWyKRdFuGpb/OUDU3DMPggw8+wLp169DU1GR03cbGBi+88AISEhJ6dcYwy7JYtGgRvvrqK4P2jz/+GI899hjUajVEIhG9Uv8TFOxuISUlBUuXLkV7ezsWL16Mb7/9Fs3NzV3X3dzckJ2dDZlMNoijJIQQAgBarRb5+flGZVjq6upM7lsoFCIkJMSoDIubmxsHIzdP1dXVePbZZ402Et4QHh6Offv24c4777ztPuvr6xEWFob6+vquNkdHR8ydOxdHjhyBSCTC559/jtmzZ5s8fktFwe4WAgICUFxc3OP1b775xqiOHSGEkKGlpqbGKOzl5eV1e4xWb8nlcqMyLEFBQRAKhRyM3Dx8//33ePrpp7t9Pc7j8RAfH4/XXnvttk+U+Prrr7Fw4cIerwcEBKCwsLDP47V0wyLY6fV6NDY2ora2FrW1taivqUGHRgNGrwdfIICNrS1c5XK4u7vD3d0dUqkULS0tt9wyv3TpUnz22WcD+CkIIYRwRaPRIDs722hnbnevFntLJBIZlWGJjIyEs7Oz6QMfolpbW7Fp0ya8/fbb3QZmT09P7N69G/Pnz7+t/h5++GF88cUXPV5XqVRwcnLq0/zO1ckZQ5VFBzulUomMjAxkpaejva0NrE4He40GTo2NsNLpwGdZMDwetEIhmqRStNragicUQmRnB1dPT6xatarH/5MHBATgt99+g7e39wB/KkIIIf2BZVmUlZUZBL1Lly7h8uXLnPTv6+trtFFj5MiRFrVm7Pz581ixYgUyMjK6vb5gwQLs2rULXl5ePfZRUVGBadOmoaioqMevOXXqFLRabZ/m94ioKIuud2iRwa6qqgppp06hpLAQVmo1Rlwph0djI5za2mB1iwroWoEATXZ2qJZKcdnTA416PQpLSnAqLa3bo3IeeeQRfPrpp/35UQghhAyy5ubmrjIsN17pZmVlQaPRmNy3g4MDIiMjDcJeRESEySc6DCatVovExES8+OKL3f6MHBwcsHXrVqxatarbp2ePPvpoj2/E5HI5JsfEYGxEBOy02j7N71dG+EArFmNkYCBip0yxuFJlFhXsdDodUlNTcT41FfYNDQgouwLvhgYI+rCOokmjRomjI64EBqLB3h6p588jLS3N4GicefPm4ZtvvuHwExBCCDEHer0eRUVFBmEvIyMDlZWVJvfN4/EQFBRktFHD09PTrCowlJSUID4+HseOHev2+h133IF9+/YhIiLCoH3evHlGZcUEAgFiYmIQO2ECZK2tCK6sREBLa5/mdz2fjwqZDEW+I9Aqk2FCbCxiY2MtZl2kxQS7mpoa/JCcDGVFJUIKCxFYWQm+CR9NqVRC064Bw+OhKigIhSEhqGxsRHJKCurq6uDk5ISffvoJEydO5PBTEEIIMWcNDQ3IzMw0CHs5OTndFvXtLRcXF6OwFxoa2quSIgONZVkcPHgQzz77rMFO1xuEQiH+8Y9/YOPGjbC1tQUAnD17FjNnzuxaCuXm5oa5998PL4kEgXl58CwogJ1IBImzaa9SGR4PhV5eyAsMhNTbC7PnzoVcLjepz6HAIoJdWVkZvj50COKqaozPzYWjWm1yn7W1tdAz/3k6p3Z0RO748agWi6HR67Fx40aL+AtACCGkf3V2diI3N9doo0ZDQ4PJfVtZWSE0NNRoZ+5QK8N19epVrFu3DgcOHOj2ekBAAPbs2YN77rkHwLVSKi+88AJ+/vlnLJo3Dx5qNUIvXID4eskxgUAAdzduTqRoFotxITQUak9PLHhoMXx9fTnpd7CYfbArKyvDVwcPwqXsCibm5EDIwfZ1AKiprQXDGL6vF9raIj8mFqqRfnjw4YfN/pdPCCFkcLAsi+rqaqMyLAUFBZyUYfH09DQ6LzcwMHDQd4T++uuvWLlyZY8bI5YtW4bt27dDJpOhrKwMhz75BI7FxQhKS4PgpqVQAr6A06PGdHw+zoaNRuOIEWY/v5t1sKupqcEXH38M55JS3JmdbdKr1z9Sq9VQNTUBYMHj8eHs7AxbkQgMj4fT4WFQ+Y3Ekscfo6d2hBBCOKNWq6FQKIye7rW0tJjct62tLcLDww3CXmRkJBwdHTkY+e3TaDTYsmUL3njjDeh0OqPrMpkMr7/+OlquXu2a3zs0GqhUKrAsA4AHZycnzjeYWMr8brbBTqfT4aP9+6HPycWUixc5e1J3Mz3DQKfTwdraGjcvV9Xx+TgZNQ5WoaF4/MknLWbBJSGEkKGHYRiUlpYalWEpLS3lpP+RI0calWHx8/Pr940aCoUCcXFxOH36tEG7QCDAE48/jnBra0xXZEN0fRwsAG1nJwRCIQT9VCLGEuZ3sw12J06cwPlffsW0s2c5WVPXW01iMY7fMQkT77kHU6dOHfD7E0IIGd6ampqMzstVKBRob283uW9HR0ej83LDw8O7NjhwhWEY7N27F//85z+7juycMmUKpk+YgEm//Qa75hY4ODjAzt4eA7Uf2Nznd7MMdlVVVfj8ww8RkqVAcEXFoI0jz9sb+RHheOSJJyyuDg4hhBDzo9PpUFhYaFSGpbq62uS++Xw+goODjXbmyuVyk5/uVVZWYs2aNUhLS8PypUsRnpcH7/z8rutWQis4OTvD2srK1I9xW8x5fjfLYPfl4cNoOHMG036/wOm6ut5ieDz8Fj0esjvvxH8tWjRo4yCEEEJupa6uzmjdXm5ubrdr3HrL1dXVKOyFhITA6k9CWGJiIj744AOEhITgjTfewKhRo/A/b70FfX4+xv3ySzfzOw92dnZwdHTs96d35jy/m12wUyqVeD8pCePSL8K3rm6wh4NSNzdcihqHp55+2mKPJyGEEGJ5Ojo6kJOTYxT4GhsbTe7b2toao0ePNtqZK5VKAQDnzp3DpEmTur7eyckJe/fuxZWiIoy9kA5JURHa1GpcW1lnSCy2g7OTk8lj/DPmOr+bXbA7fvw4Lv38M/5yKrVPFae5pufz8e/JsYiaMQN33XXXYA+HEEII6TOWZVFRUWG0UaOoqAhcxAVvb2+MGTMGLS0tOHnypMG1qVOnYvaEiZh7/jyEDINOrRYqlQo6nWFxZ65LnfTEXOd3s9ruodfrkZWejhFXyodEqAMAAcPAt7wcmRcuYPLkyYNeI4gQQgjpKx6PBx8fH/j4+OCBBx7oam9ra+s6L/dG2MvMzERbW1uv+q+oqEBFN2vjeTweoiIiIC8uQmNtLaRSKaytrODq6oq21la0tLSAvf70ztrGxrQPeZvMdX7v035hLipaP/XUUyguLu7xemJiIjo7O7v+PG3aNDQ2NqK9rQ0e3TwmfjQzEzMv/I456elYeOkiclpbTR7j7fK4em1chw4dwvTp0xEZGYkvvvgCAPD7779j3bp1nN3r3LlziI6OhpWVFb7//nvO+iWEEEJ6YmdnhzvuuAMrV65EUlIS0tLS0NzcjMLCQnz55ZfYtGkT5syZgxEjRvSpfz6fj88PH4a4ogI6nRZ1dXVo7+gAD4C9vT1c3dzg6OAIJycnOF1/DVvR3o6Um44pO6tS4ZncnK4//+/Vq5h7MR2z0y9g/sV0vH9ToNTo9RiblopPq6puOS6n2jq8/c47cHR0REJCQp8+20Dr06tYmUzGyVEot+Ln5weFQgF7e/uuNoVCgZQjRzDn+AmjunWPZmbiBX9/BNnZ4XBNDVIa6vFheMQfu+0VPctC8Cc7fVgArVotvps6FUdSfkB2djaAa39Jq6qqOH9cXFFRgatXr2LHjh1YvHixwb+oCCGEkMGmVCqNzsvNzs5GR0dHj98TFhaG/5o1C1OTk7tOmBAKhHBzc+vxe86qVPi0ugq7Qkcb/TmntRVr8nLxQVg4fG1t0ckw+LauDouuFx1Oqa/Hx1VV4POAzyPH9HiPNh4Pe0aNhKuPDzQaDbZv396XH8mA4uxVbHp6OlatWgWNRoNx48Zh3759EIlE+Pbbb7Fu3To4OTkhMjISEokE27dvx91334133nkHoaGhWLZsGdLT0yEQCPDcc89BrVajqqoKMTEx8PPzQ3JyMmQyGQ4dOgR7jQbvlZXih/p68AAsdJfjCS8vg7GMd3TE/spryVzPsnizpATnm5ugZVis8PbGXDc3qPV6JOTno0SjxhgHR5xpUuGHqPFQtLRgd/kVWPP5aNLp8FF4BF4uLkKhWg2WBRL8/BArkeC0SoXNRUVgWQYCAE+OHQs7O7uuMTAMg6+++grOzs745JNPsHv3bjQ2NmL9+vWorKyEs7Mz3nzzTXh7e2PdunVwcHBARkYGlEoltm7darCo9I8cHBzQ1taGmpoaXL58matfISGEEMKJG69z58yZAwDQarUoKSlBbm4ujh8/juTkZIOvd3d3h21LC2ra2/F6XR3aGQZCPh+vikQItbNDkUaDfxYW4MYjnQNh4XirrAyF6jbMvZiORz084SsSdfW3v7IC8T4+8L1ed8+az+8KdQCQ0lCP//b1xUvFRajp6IC8h9e7diyLSDc3lN4ilA41nAW7ZcuW4f3338ekSZMQHx+PpKQkxMfHY82aNUhNTYVcLse9996L6Ohog++7dOkSSkpKkJNz7fFpU1MTnJycsG3bNqSlpRk8sauvqUFJRgZOq1Q4OnYcrPl8qLSGiyoB4HhjI+6RugAAjtTWwM3aGkfHjkO7Xo9FGRmYIpHgy9oaeIlskDR6NFJVShytq+36fkVrK/4dNR7uNjbYUVqKaVIp3ggKRqNWi4czM5AyLgp7Sy5jlcQZ0WIxWvV6VDY3QfmHV8SrV6/u+t/+/v5G4+xpMebSpUv/7McNADh69OhtfR0hhBAylKWnp6OjtBSbBALs8PSENY+H4o4OvFZYgB2enjjQ0IAHXVzw2AhftOv14PN4WOvra/TE7oYitRp/9fLu9l6tOh2yW1sx0ckJs2Qy/NjQgOV/eEB0M8dGJVoEfNg5OHD6mfsLJ8FOpVKho6Oj6ynTY489hm3btmH69OkICQmBt/e1H+6DDz6IsrIyg+8dNWoUqqqqsHr1asybNw8zZszo8T4dGg1yKyrwoLsc1tePE3G+qU7OM3m56GQYtOr1SB4XBQBIVSpRoFbj2/prpVFa9TqUt7cjvbkFcdfHFessgfNNx4ZEOTrC/Xp6T1UpcbzxKpLKywFcey9fqlQizMYG+65eRVlnJ+62t4ewsxMe7u4o7OFgY0IIIYR0b8GcOZisVkN74QL+p74exZ2d4ANouv5aNszGBh9WV0PN42OWqwwjRH0/AePXxkZMlUgg4PEwS+aKTUWFtwx21joddMxAnXthun7dFXs7y/ckEgmysrKQkpKCt956Cz/99FOP77AZvR64RZ+7QkIRKBbjtZLLePVyMXaHjgYDYHNAACY6Of9xdD32Y3vTGXQMy2LP6DB43fSIt7mlBY9IJJgkFuO0Wo2nKyvxj9GjERwQgJOpqX/6mQkhhBDyH0I+HzyGwZdNTZALhXjezQ0alsWS6w+D7nVwQKjIFgo+H08oFHg7JPSW/fmLxchta0PoTW/9bkhpqEdmSwtOKpUAgLrOTlS2txvM8zfjswwYvYkfcABxcoqus7MzbGxscP78eQDAZ599hqlTpyIkJAR5eXmorKyEXq/v9tVhQ0MDGIbB4sWL8dJLL+HSpUsArq0ja2lpMRysQIAIT098VVuDzuubJ/74KpbH4+E5Xz9cam7GZbUak50l+Ky6GvrrgbCgrQ16lsU4R0f8+/oGkNMqFVQ9VN+OlUjw8U27ZnJaW+Hg4IBahkGAjQ0ek0jga2WFerUaypseAxNCCCHk9ugYBiyfjzaGgYtQCB6Phx9vygC1ej3CXF3xhJcXJjs7o0ithp1QgDZ994nrSS9vvFt+BVc0GgCAlmHwZU0NmnU6KFpb8X8TJ+G3CRPx24SJiPP27soD3WF4fPDNpNQJ0Mcndkqlsuv1KgBs27YNH374IeLj49He3o6xY8ciPj4eIpEIiYmJmDZtGpycnBASEgJHR0eDviorK7F8+XIwDAOhUIjExEQAwIoVKzBt2jQEBQV1LbK0sbVFuJ8fNIVFmH/pIoQ8Hh50c8eyPzxCtRUI8KSXN/ZXVuLlgABUtLdj/sV0MABcra3xflg4HvHwREJ+HmanX8AYewe4W1tDxDfOuat9RuDVy8WYk34BOpZFmL09tgeHILm9HWdUKoBhEGxtDV+5HCk5OQbf+80330AkEmHPnj34/PPP0dDQgLi4OFRUVEAikWDfvn3w9fVFXFwc5s+fj9mzZ6O1tRXR0dHIy8vr9meflZWFBQsWQKVSwdbWFv7+/jh+/HjvfoGEEELIEODj44Py8nIcPXIE7MmTWOEfgGfycnFMo8F9Uhfw+Sp4yD2QXFGB53NzIOTx4GVjg/tcXGDF40HHst1ungizt0eC30iszs2FjmXA5/Gw0M0d/3v1KiY7SwwqXtznIsPGokI85d39mrz//uZrtOh04PF4+OKLL3DmzBmDDDTU9PvJE62trbC3t4der8fChQuxYsWKPpfo+OWXX5B/7BjuO33G5HHpWBYMy8Kaz0dGSwteLi7C0bHj+tRXp7YTP0ZHIyU3F7/++isAwMbGBtXV1WZ1DAkhhBAyGLic37n28513IHjmTNxzzz2DPZTb0u8nT7z77rv47LPP0NHRgXvvvRf3339/n/tyd3fHBVtbaAUCWPXw+PV2qfV6LMvKgo5lYcXn4SX/gD73xRPZQu/igtWrV8PT0xP19fVISEigUEcIIYTcBi7ndy5pBQK02toOyBFmXOn3YLdu3TrOTl5wd3cHTyhEk50dZM3NJvXlKBTi63F9e0L3R012duAJhZgyZQoWLlzISZ/Hjh3D+vXrDdpiY2Oxe/duTvonhBBChgou5/e+UGq1WKbIMmiz5vGxZ+pU8IRCCnb9RSqVQmRnh2qpdFB+8T2pdrk2LqlUylmfM2fOxMyZMznrjxBCCBmqBnt+l1hZdZVJu1lWP8zv/Y2TXbEDRSAQICIqCldG+EDfzUaHwaDn81Hm44PI8ePN5oBgQgghZCih+Z07Q+On1wtjxoyBVixGhUw22EMBAJTLZNCJxYiMjBzsoRBCCCFmi+Z3bphdsJNIJBgZGIgi3xFgeINbCZrh8VDsOwIjg4JoowQhhBBiAprfuWF2wQ4AYqdMQatMhsJbHAEyEAq8vNAqkyF28uRBHQchhBBiCWh+N51ZBjsPDw9MiI1FXmAgmsXiQRlDk1iM/KBATJw8GR4eHoMyBkIIIcSS0PxuOrMMdsC10h8Sby9cCA2FboAXWur4fFwYHQqplxdiYmIG9N6EEEKIJaP53TRmG+yEQiHunzsXak9PnA0bPWDv4xkeD2fDRkPj4YnZc+dCKDSrijGEEELIkEbzu2nMNtgBgFwux4KHFqNxxAicDg/r92Sv4/NxOjwMjSNGYMFDiyGXy/v1foQQQshwRPN73/X7WbEDoaysDF8fOgxxVRXG5+bCUa3m/B5NYjEujA6FxsMTCx5aDF9fX87vQQghhJD/oPm99ywi2AFATU0NfkhOhrKiEiGFhQisrASfg4/G8Hgo8PJCflAgpF5emD13rlkneUIIIcSc0PzeOxYT7ABAp9MhNTUV51NTYd/QAP+yK/BpaICAYXrdl57PR7lMhmLfEWiVyTBx8mTExMSY7Tt3QgghxFzR/H77LCrY3VBVVYW01FSUFBRAqFbDt7wcHlcb4dTWBiu9vsfv0woEaLKzQ7WLFGU+PtCJxRgZFIRYM93yTAghhFgSmt//nEUGuxuUSiUyMzOReeEC2tvawOp0sNdo4NiohLVOBz7LgOHx0SkUolkqQautLXhCIUR2dogcPx6RkZFmV3GaEEIIsXQ0v/fMooPdDXq9Ho2NjaitrUVtbS3qa2rQ2d4OvU4HgVAIa5EIrnI53N3d4e7uDqlUalYH/hJCCCHDEc3vxoZFsCOEEEIIGQ7Muo4dIYQQQgj5Dwp2hBBCCCEWgoIdIYQQQoiFoGBHCCGEEGIhKNgRQgghhFgICnaEEEIIIRaCgh0hhBBCiIWgYEcIIYQQYiEo2BFCCCGEWAgKdoQQQgghFoKCHSGEEEKIhaBgRwghhBBiISjYEUIIIYRYCAp2hBBCCCEWgoIdIYQQQoiFoGBHCCGEEGIhKNgRQgghhFgICnaEEEIIIRaCgh0hhBBCiIWgYEcIIYQQYiEo2BFCCCGEWAgKdoQQQgghFoKCHSGEEEKIhaBgRwghhBBiISjYEUIIIYRYCAp2hBBCCCEWgoIdIYQQQoiF+H8UN8APsab5QAAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tpot2\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.linear_model import LogisticRegression\n", - "import sklearn\n", - "\n", - "subsets = [['a','b','c'],['d','e','f'],['g','h','i']]\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", - " scorers_weights=[1,-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=\"transformers\",\n", - " subsets = subsets,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "est.fit(X_train,y_train)\n", - "print(sklearn.metrics.get_scorer('roc_auc_ovr')(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='1', sel_subset=['d', 'e', 'f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='0', sel_subset=['a', 'b', 'c'])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=0.01924346331466653)\n", - "PolynomialFeatures_1 : PolynomialFeatures(include_bias=False)\n", - "OneHotEncoder_1 : OneHotEncoder()\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='1', sel_subset=['d', 'e', 'f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='0', sel_subset=['a', 'b', 'c'])\n", - "FastICA_1 : FastICA(whiten='unit-variance')\n", - "PolynomialFeatures_2 : PolynomialFeatures(include_bias=False)\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## csv file\n", - "\n", - "note: watch for spaces in the csv file!" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:46<00:00, 2.34s/it]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9678534836065574\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tpot2\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.linear_model import LogisticRegression\n", - "import sklearn\n", - "\n", - "subsets = 'simple_fss.csv'\n", - "'''\n", - "# simple_fss.csv\n", - "one,a,b,c\n", - "two,d,e,f\n", - "three,g,h,i\n", - "'''\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", - " scorers_weights=[1,-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=\"transformers\",\n", - " subsets = subsets,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "est.fit(X_train,y_train)\n", - "print(sklearn.metrics.get_scorer('roc_auc_ovr')(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='two', sel_subset=['d', 'e', 'f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='one', sel_subset=['a', 'b', 'c'])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=90.92104183243647, solver='saga')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='two', sel_subset=['d', 'e', 'f'])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='one', sel_subset=['a', 'b', 'c'])\n", - "RBFSampler_1 : RBFSampler(gamma=0.9480907031133559)\n", - "Binarizer_1 : Binarizer(threshold=0.5204447023562712)\n", - "RBFSampler_2 : RBFSampler(gamma=0.07182739023710172)\n", - "MaxAbsScaler_1 : MaxAbsScaler()\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "note that all of the above is the same when using numpy X, but the column names are now int indeces" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0.03418023 1.85703799 1.3321493 ... 0.61740176 0.03615026\n", - " 0.73457701]\n", - " [ 0.00655906 0.3495084 -2.86361395 ... 0.27195435 0.52330367\n", - " 0.47208072]\n", - " [ 1.84952258 -0.98538028 0.60941956 ... 0.14054112 0.77081219\n", - " 0.17160637]\n", - " ...\n", - " [ 0.02282946 0.55489649 -2.89758703 ... 0.04122268 0.66234341\n", - " 0.76367281]\n", - " [-1.34268913 2.73488335 -1.82542106 ... 0.59224411 0.94857147\n", - " 0.20810423]\n", - " [-0.46791145 2.53228934 -2.08802875 ... 0.82326686 0.23363656\n", - " 0.77884819]]\n" - ] - } - ], - "source": [ - "import tpot2\n", - "import sklearn.datasets\n", - "from sklearn.linear_model import LogisticRegression\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "n_features = 6\n", - "X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=n_features, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n", - "X = np.hstack([X, np.random.rand(X.shape[0],3)]) #add three uninformative features\n", - "\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - "\n", - "print(X)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generation: 100%|██████████| 20/20 [00:44<00:00, 2.22s/it]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9830226151579218\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tpot2\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.linear_model import LogisticRegression\n", - "import sklearn\n", - "\n", - "subsets = { \"group_one\" : [0,1,2],\n", - " \"group_two\" : [3,4,5],\n", - " \"group_three\" : [6,7,8],\n", - " }\n", - "\n", - "est = tpot2.TPOTEstimator(population_size=40,generations=20, \n", - " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", - " scorers_weights=[1,-1],\n", - " n_jobs=32,\n", - " classification=True,\n", - " leaf_config_dict=\"feature_set_selector\",\n", - " root_config_dict=root_config_dict,\n", - " inner_config_dict=\"transformers\",\n", - " subsets = subsets,\n", - " verbose=1,\n", - " )\n", - "\n", - "\n", - "est.fit(X_train,y_train)\n", - "print(sklearn.metrics.get_scorer('roc_auc_ovr')(est, X_test, y_test))\n", - "\n", - "est.fitted_pipeline_.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FeatureSetSelector_1 : FeatureSetSelector(name='group_one', sel_subset=[0, 1, 2])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='group_two', sel_subset=[3, 4, 5])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='group_three', sel_subset=[6, 7, 8])\n" - ] - } - ], - "source": [ - "# print the selected features for each FSS\n", - "\n", - "#get leaves\n", - "leaves = [v for v, d in est.fitted_pipeline_.graph.out_degree() if d == 0]\n", - "for l in leaves:\n", - " print(l, \" : \", est.fitted_pipeline_.graph.nodes[l]['instance'])" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LogisticRegression_1 : LogisticRegression(C=0.13013559430004598, solver='sag')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='group_one', sel_subset=[0, 1, 2])\n", - "PCA_1 : PCA(n_components=0.9988096714708292)\n", - "PolynomialFeatures_1 : PolynomialFeatures(include_bias=False)\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='group_two', sel_subset=[3, 4, 5])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='group_three', sel_subset=[6, 7, 8])\n", - "Normalizer_1 : Normalizer(norm='max')\n", - "RBFSampler_1 : RBFSampler(gamma=0.17772815448977386)\n" - ] - } - ], - "source": [ - "# print all hyperparameters\n", - "for n in est.fitted_pipeline_.graph.nodes:\n", - " print(n, \" : \", est.fitted_pipeline_.graph.nodes[n]['instance'])" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "57aedbec84c390a3287b44649e400696ed2b6dcd408c8519583e8e995dbe6e9b" - }, - "kernelspec": { - "display_name": "Python 3.10.12 ('tpot2env2')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "7fe1fe9ef32cd5efd76326a08046147513534f0dd2318301a1a96ae9071c1c4e" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Tutorial/4_Symbolic_Regression_and_Classification.ipynb b/Tutorial/4_Symbolic_Regression_and_Classification.ipynb index 8b6c7254..80c4ef4a 100644 --- a/Tutorial/4_Symbolic_Regression_and_Classification.ipynb +++ b/Tutorial/4_Symbolic_Regression_and_Classification.ipynb @@ -24,26 +24,32 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 50/50 [01:59<00:00, 2.39s/it]\n" + "Generation: 0%| | 0/20 [00:00" ] @@ -57,30 +63,32 @@ "import sklearn.datasets\n", "from sklearn.linear_model import LogisticRegression\n", "import numpy as np\n", - "from tpot2.builtin_modules import ZeroTransformer, OneTransformer\n", - "from tpot2.config.classifiers import params_LogisticRegression\n", "\n", - "root_config_dict = {LogisticRegression: params_LogisticRegression}\n", - "leaf_config_dict = [\"feature_set_selector\", {ZeroTransformer: {}, OneTransformer: {}}]\n", + "X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=100, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", + "n_features = X_train.shape[1]\n", "\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space(\"LogisticRegression\"),\n", + " leaf_search_space = tpot2.search_spaces.nodes.FSSNode(subsets=n_features), \n", + " inner_search_space = tpot2.config.get_search_space([\"arithmatic\"]),\n", + " max_size = 10,\n", + ")\n", "\n", - "est = tpot2.TPOTEstimator(population_size=100,generations=50, \n", - " scorers=['roc_auc'],\n", + "est = tpot2.TPOTEstimator(population_size=10,generations=20, \n", + " scorers=['roc_auc_ovr'],\n", " scorers_weights=[1],\n", " other_objective_functions=[tpot2.objectives.number_of_nodes_objective],\n", " other_objective_functions_weights=[-1],\n", - " classification=True,\n", - " inner_config_dict= \"arithmetic_transformer\",\n", - " leaf_config_dict=leaf_config_dict,\n", - " root_config_dict=root_config_dict,\n", " n_jobs=32,\n", + " classification=True,\n", + " search_space = graph_search_space ,\n", " verbose=1,\n", " )\n", "\n", - "#load iris\n", "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", - "X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=100, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", "est.fit(X_train, y_train)\n", "print(scorer(est, X_test, y_test))\n", "est.fitted_pipeline_.plot()" @@ -88,23 +96,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "LogisticRegression_1 : LogisticRegression(C=282.83015030119856, max_iter=1000, n_jobs=1, solver='sag')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='50', sel_subset=[50])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='16', sel_subset=[16])\n", - "MaxTransformer_1 : MaxTransformer()\n", + "LogisticRegression_1 : LogisticRegression(C=0.28751652817028706, class_weight='balanced',\n", + " max_iter=1000, n_jobs=1, solver='liblinear')\n", + "FeatureSetSelector_1 : FeatureSetSelector(name='93', sel_subset=[93])\n", + "FeatureSetSelector_2 : FeatureSetSelector(name='25', sel_subset=[25])\n", + "LETransformer_1 : LETransformer()\n", "LTTransformer_1 : LTTransformer()\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='42', sel_subset=[42])\n", - "FeatureSetSelector_4 : FeatureSetSelector(name='21', sel_subset=[21])\n", - "MaxTransformer_2 : MaxTransformer()\n", - "LTTransformer_2 : LTTransformer()\n", - "MulTransformer_1 : MulTransformer()\n" + "MinTransformer_1 : MinTransformer()\n" ] } ], @@ -116,12 +121,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAGwCAYAAABB4NqyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3DElEQVR4nO3deXRU9f3/8dckSJJCEgRCFogEEQKBsENMwA0jiHSq5XeEsi8iVUGBUNoECGEpBGwbY8vmAui3So2nClWBIEbBskggNFQKslcoJKySQGhYMvf3h4dppwkIw0xukvt8nDPndD7zuXfen6En8/JzP/O5NsMwDAEAAFiIj9kFAAAAVDYCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsJxaZhdQFTkcDp04cUKBgYGy2WxmlwMAAG6BYRi6cOGCIiIi5ONz8zkeAlAFTpw4ocjISLPLAAAAbjh27JiaNGly0z4EoAoEBgZK+v4DDAoKMrkaAABwK4qLixUZGen8Hr8ZAlAFrl/2CgoKIgABAFDN3MryFRZBAwAAyyEAAQAAyyEAAQAAyyEAAQAAyyEAAQAAyyEAAQAAyyEAAQAAyyEAAQAAyyEAAQAAy2EnaAAAUGnKHIZyj5zTqQulahTor27N6svXp/JvPE4AAgAAlSJ7d4FmfrxHBUWlzrbwYH+l2WP0eNvwSq2FS2AAAMDrsncX6Pl3drqEH0kqLCrV8+/sVPbugkqthwAEAAC8qsxhaObHe2RU8Nr1tpkf71GZo6Ie3kEAAgAAXpV75Fy5mZ//ZkgqKCpV7pFzlVYTAQgAAHjVqQs3Dj/u9PMEAhAAAPCqRoH+Hu3nCQQgAADgVd2a1Vd4sL9u9GN3m77/NVi3ZvUrrSYCEAAAVUSZw9DWQ2f1l/zj2nrobKUuCvYmXx+b0uwxklQuBF1/nmaPqdT9gNgHCACAKqAq7ZHjDY+3DdfiIZ3KjTHMpDHaDMOoGfHSg4qLixUcHKyioiIFBQWZXQ4AoIa7vkfO/34hX58PWTykU40IQZJ3d4K+ne9vZoAAADDRD+2RY9P3e+Q8FhNmyi0jPM3Xx6b45g3MLoM1QAAAmKkq7pFjBQQgAABMVBX3yLECAhAAACaqinvkWAEBCAAAE1XFPXKsgAAEAICJquIeOVZAAAIAwGTX98gJC3a9zBUW7F+jfgJflfAzeAAAqoDH24brsZgwr+2RA1cEIAAAqoiqskeOFXAJDAAAWA4zQACAasObt1GAtRCAAADVQk2/WSgqF5fAAABV3vWbhf7vLSMKi0r1/Ds7lb27wKTKUF0RgAAAVdoP3SxU+v5moWWOinoAFSMAAQCqNG4WCm8gAAEAqjRuFgpvIAABAKo0bhYKbyAAAQCqNG4WCm8gAAEAqjRuFgpvIAABAKo8bhYKT2MjRABAtcDNQuFJBCAAQLXBzULhKQQgAKhBuFcWcGsIQABQQ3CvLODWsQgagGWUOQxtPXRWf8k/rq2HztaoWydwryzg9jADBMCpJl8+qcmzIz90ryybvr9X1mMxYTXm3xO4U1ViBmjhwoWKioqSv7+/4uLilJube9P+mZmZio6OVkBAgCIjIzVx4kSVlv7nj1pUVJRsNlu5x9ixY709FKDayt5doB7zP9fAN77S+PfyNfCNr9Rj/uc1Yuagps+OcK8s4PaZHoCysrKUlJSktLQ07dy5U+3bt1fv3r116tSpCvuvWLFCycnJSktL0969e7V06VJlZWVpypQpzj7bt29XQUGB87F+/XpJ0tNPP10pYwKqm5ocEKxwJ3HulQXcPtMDUEZGhp599lmNHDlSMTExWrJkiX70ox9p2bJlFfbfsmWLunfvrkGDBikqKkq9evXSwIEDXWaNQkJCFBYW5nx88sknat68uR566KEKz3n58mUVFxe7PACrqOkBwQqzI9wrC7h9pgagK1euKC8vT4mJic42Hx8fJSYmauvWrRUek5CQoLy8PGfgOXz4sNasWaMnnnjihu/xzjvvaNSoUbLZKr72nZ6eruDgYOcjMjLyDkcGVB81PSBYYXaEe2UBt8/UAHTmzBmVlZUpNDTUpT00NFSFhYUVHjNo0CDNmjVLPXr00F133aXmzZvr4YcfdrkE9t9WrVql8+fPa8SIETesIyUlRUVFRc7HsWPH3B4TUN3U9IBghdkR7pUF3D7TL4Hdrg0bNmju3LlatGiRdu7cqQ8//FCrV6/W7NmzK+y/dOlS9enTRxERETc8p5+fn4KCglwegFXU9IBgldkR7pUF3B5TfwbfsGFD+fr66uTJky7tJ0+eVFhYWIXHpKamaujQoRo9erQkKTY2ViUlJRozZoymTp0qH5//ZLpvv/1Wn332mT788EPvDQKo5q4HhMKi0grXAdn0/ZdodQ0I12dHnn9np2ySyxhr2uwI98oCbp2pM0C1a9dW586dlZOT42xzOBzKyclRfHx8hcdcunTJJeRIkq+vryTJMFz/fC9fvlyNGjVS3759PVw5UHNY4fKJlWZHrt8r68kOjRXfvEG1/ncDvMn0jRCTkpI0fPhwdenSRd26dVNmZqZKSko0cuRISdKwYcPUuHFjpaenS5LsdrsyMjLUsWNHxcXF6eDBg0pNTZXdbncGIen7ILV8+XINHz5ctWqZPkygSrseEP53o8CwGrJRoMTsCABXpieDAQMG6PTp05o+fboKCwvVoUMHZWdnOxdGHz161GXGZ9q0abLZbJo2bZqOHz+ukJAQ2e12zZkzx+W8n332mY4ePapRo0ZV6niA6soKAYE7iQO4zmb873UjqLi4WMHBwSoqKmJBNAAA1cTtfH9Xu1+BAQAA3CkCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBwCEAAAsBzT7wYPVBdlDqNG3ykdAKyEAATcguzdBZr58R4VFJU628KD/ZVmj9HjbcNNrAwA4A4ugQE/IHt3gZ5/Z6dL+JGkwqJSPf/OTmXvLjCpMgCAuwhAwE2UOQzN/HiPjApeu9428+M9KnNU1AMAUFURgICbyD1yrtzMz38zJBUUlSr3yLnKKwoAcMcIQMBNnLpw4/DjTj8AQNVAAAJuolGgv0f7AQCqBgIQcBPdmtVXeLC/bvRjd5u+/zVYt2b1K7MsAMAdIgABN+HrY1OaPUaSyoWg68/T7DHsBwQA1QwBCPgBj7cN1+IhnRQW7HqZKyzYX4uHdGIfIACohtgIEbgFj7cN12MxYewEDQA1BAEIuEW+PjbFN29gdhkAAA/gEhgAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAcAhAAALAc0wPQwoULFRUVJX9/f8XFxSk3N/em/TMzMxUdHa2AgABFRkZq4sSJKi0tdelz/PhxDRkyRA0aNFBAQIBiY2O1Y8cObw4DAABUI7XMfPOsrCwlJSVpyZIliouLU2Zmpnr37q19+/apUaNG5fqvWLFCycnJWrZsmRISErR//36NGDFCNptNGRkZkqTvvvtO3bt31yOPPKK1a9cqJCREBw4c0N13313ZwwMAAFWUzTAMw6w3j4uLU9euXbVgwQJJksPhUGRkpF588UUlJyeX6z9u3Djt3btXOTk5zrZJkyZp27Zt2rRpkyQpOTlZmzdv1l//+le36youLlZwcLCKiooUFBTk9nkAAEDluZ3vb9MugV25ckV5eXlKTEz8TzE+PkpMTNTWrVsrPCYhIUF5eXnOy2SHDx/WmjVr9MQTTzj7fPTRR+rSpYuefvppNWrUSB07dtQbb7xx01ouX76s4uJilwcAAKi5TAtAZ86cUVlZmUJDQ13aQ0NDVVhYWOExgwYN0qxZs9SjRw/dddddat68uR5++GFNmTLF2efw4cNavHixWrRooXXr1un555/XSy+9pLfffvuGtaSnpys4ONj5iIyM9MwgAQBAlWT6IujbsWHDBs2dO1eLFi3Szp079eGHH2r16tWaPXu2s4/D4VCnTp00d+5cdezYUWPGjNGzzz6rJUuW3PC8KSkpKioqcj6OHTtWGcMBAAAmMW0RdMOGDeXr66uTJ0+6tJ88eVJhYWEVHpOamqqhQ4dq9OjRkqTY2FiVlJRozJgxmjp1qnx8fBQeHq6YmBiX41q3bq0PPvjghrX4+fnJz8/vDkcEAACqC9NmgGrXrq3OnTu7LGh2OBzKyclRfHx8hcdcunRJPj6uJfv6+kqSrq/l7t69u/bt2+fSZ//+/WratKknywcAANWYqT+DT0pK0vDhw9WlSxd169ZNmZmZKikp0ciRIyVJw4YNU+PGjZWeni5JstvtysjIUMeOHRUXF6eDBw8qNTVVdrvdGYQmTpyohIQEzZ07V/3791dubq5ef/11vf7666aNEwAAVC2mBqABAwbo9OnTmj59ugoLC9WhQwdlZ2c7F0YfPXrUZcZn2rRpstlsmjZtmo4fP66QkBDZ7XbNmTPH2adr165auXKlUlJSNGvWLDVr1kyZmZkaPHhwpY8PAABUTabuA1RVsQ8QAADVT7XYBwgAAMAsBCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5BCAAAGA5dxSArly5on379unatWueqgcAAMDr3ApAly5d0jPPPKMf/ehHatOmjY4ePSpJevHFFzVv3jyPFggAAOBpbgWglJQU7dq1Sxs2bJC/v7+zPTExUVlZWR4rDgAAwBtquXPQqlWrlJWVpfvvv182m83Z3qZNGx06dMhjxQEAAHiDWzNAp0+fVqNGjcq1l5SUuAQiAACAqsitANSlSxetXr3a+fx66HnzzTcVHx/vmcoAAAC8xK1LYHPnzlWfPn20Z88eXbt2Ta+++qr27NmjLVu2aOPGjZ6uEQAAwKPcmgHq0aOHdu3apWvXrik2NlaffvqpGjVqpK1bt6pz586erhEAAMCjbnsG6OrVq/r5z3+u1NRUvfHGG96oCQAAwKtuewborrvu0gcffOCNWgAAACqFW5fAnnrqKa1atcrDpQAAAFQOtxZBt2jRQrNmzdLmzZvVuXNn1alTx+X1l156ySPFAQAAeIPNMAzjdg9q1qzZjU9os+nw4cN3VJTZiouLFRwcrKKiIgUFBZldDgAAuAW38/3t1gzQkSNH3CoMAACgKriju8FLkmEYcmMSCQAAwDRuB6D/+7//U2xsrAICAhQQEKB27drpj3/8oydrAwAA8Aq3LoFlZGQoNTVV48aNU/fu3SVJmzZt0nPPPaczZ85o4sSJHi0SAADAk9xeBD1z5kwNGzbMpf3tt9/WjBkzqv0aIRZBAwBQ/dzO97dbl8AKCgqUkJBQrj0hIUEFBQXunBIAAKDSuBWA7rvvPr3//vvl2rOystSiRYs7LgoAAMCb3FoDNHPmTA0YMEBffvmlcw3Q5s2blZOTU2EwAgAAqErcmgH6f//v/2nbtm1q2LChVq1apVWrVqlhw4bKzc3VT3/6U0/XCAAA4FFuLYKu6VgEDQBA9eP1RdBr1qzRunXryrWvW7dOa9eudeeUAAAAlcatAJScnKyysrJy7YZhKDk5+Y6LAgAA8Ca3AtCBAwcUExNTrr1Vq1Y6ePDgHRcFAADgTW4FoODg4Arv+H7w4EHVqVPnjosCAADwJrcC0JNPPqkJEybo0KFDzraDBw9q0qRJ+slPfuKx4gAAALzBrQD08ssvq06dOmrVqpWaNWumZs2aqXXr1mrQoIF++9vferpGAAAAj3JrI8Tg4GBt2bJF69ev165du5x3g3/wwQc9XR8AAIDHeWwfoPPnz6tevXqeOJXp2AcIAIDqx+v7AM2fP19ZWVnO5/3791eDBg3UuHFj7dq1y51TAgAAVBq3AtCSJUsUGRkpSVq/fr3Wr1+vtWvXqk+fPpo8ebJHCwQAAPA0t9YAFRYWOgPQJ598ov79+6tXr16KiopSXFycRwsEAADwNLdmgO6++24dO3ZMkpSdna3ExERJ3+8EXdEO0QAAAFWJWzNA/fr106BBg9SiRQudPXtWffr0kST97W9/03333efRAgEAADzNrQD0yiuvKCoqSseOHdPLL7+sunXrSpIKCgr0wgsveLRAAAAAT/PYz+Ar0rdvX7355psKDw/31lt4BT+DBwCg+vH6z+Bv1Zdffql///vf3nwLAACA2+bVAAQAAFAVEYAAAIDlEIAAAIDlEIAAAIDlEIAAAIDleDUATZkyRfXr1/fmWwAAANw2twJQenq6li1bVq592bJlmj9/vvN5SkqK6tWr53ZxAAAA3uBWAHrttdfUqlWrcu1t2rTRkiVL7rgoAAAAb3IrABUWFla4u3NISIgKCgruuCgAAABvcisARUZGavPmzeXaN2/erIiIiDsuCgAAwJvcuhnqs88+qwkTJujq1avq2bOnJCknJ0e//OUvNWnSJI8WCAAA4GluzQBNnjxZzzzzjF544QXde++9uvfee/Xiiy/qpZdeUkpKym2fb+HChYqKipK/v7/i4uKUm5t70/6ZmZmKjo5WQECAIiMjNXHiRJWWljpfnzFjhmw2m8ujojVLAADAmtyaAbLZbJo/f75SU1O1d+9eBQQEqEWLFvLz87vtc2VlZSkpKUlLlixRXFycMjMz1bt3b+3bt0+NGjUq13/FihVKTk7WsmXLlJCQoP3792vEiBGy2WzKyMhw9mvTpo0+++yz/wy0lltDBQAANdAdpYK6deuqa9eud1RARkaGnn32WY0cOVKStGTJEq1evVrLli1TcnJyuf5btmxR9+7dNWjQIElSVFSUBg4cqG3btrn0q1WrlsLCwu6oNgAAUDO5FYAeeeQR2Wy2G77++eef39J5rly5ory8PJfLZj4+PkpMTNTWrVsrPCYhIUHvvPOOcnNz1a1bNx0+fFhr1qzR0KFDXfodOHBAERER8vf3V3x8vNLT03XPPfdUeM7Lly/r8uXLzufFxcW3VD8AAKie3ApAHTp0cHl+9epV5efna/fu3Ro+fPgtn+fMmTMqKytTaGioS3toaKi++eabCo8ZNGiQzpw5ox49esgwDF27dk3PPfecpkyZ4uwTFxent956S9HR0SooKNDMmTP1wAMPaPfu3QoMDCx3zvT0dM2cOfOW6wYAANWbWwHolVdeqbB9xowZunjx4h0V9EM2bNiguXPnatGiRYqLi9PBgwc1fvx4zZ49W6mpqZKkPn36OPu3a9dOcXFxatq0qd5//30988wz5c6ZkpKipKQk5/Pi4mJFRkZ6dRwAAMA8Hl0ZPGTIEHXr1k2//e1vb6l/w4YN5evrq5MnT7q0nzx58obrd1JTUzV06FCNHj1akhQbG6uSkhKNGTNGU6dOlY9P+R+21atXTy1bttTBgwcrPKefn59bC7gBAED15NGboW7dulX+/v633L927drq3LmzcnJynG0Oh0M5OTmKj4+v8JhLly6VCzm+vr6SJMMwKjzm4sWLOnToUIW7V8NzyhyGth46q7/kH9fWQ2dV5qj43wMAALO5NQPUr18/l+eGYaigoEA7duxwXoa6VUlJSRo+fLi6dOmibt26KTMzUyUlJc5fhQ0bNkyNGzdWenq6JMlutysjI0MdO3Z0XgJLTU2V3W53BqFf/OIXstvtatq0qU6cOKG0tDT5+vpq4MCB7gwXtyB7d4FmfrxHBUX/2Y8pPNhfafYYPd6W4AkAqFrcCkDBwcEuz318fBQdHa1Zs2apV69et3WuAQMG6PTp05o+fboKCwvVoUMHZWdnOxdGHz161GXGZ9q0abLZbJo2bZqOHz+ukJAQ2e12zZkzx9nnX//6lwYOHKizZ88qJCREPXr00FdffaWQkBB3hosfkL27QM+/s1P/O99TWFSq59/ZqcVDOhGCAABVis240XUjCysuLlZwcLCKiooUFBRkdjlVWpnDUI/5n7vM/Pw3m6SwYH9t+lVP+frceOsEAADu1O18f3t0DRCsJ/fIuRuGH0kyJBUUlSr3yLnKKwoAgB/g1iWwsrIyvfLKK3r//fd19OhRXblyxeX1c+f4srOKUxduHH7c6QcAQGVwawZo5syZysjI0IABA1RUVKSkpCT169dPPj4+mjFjhodLRFXWKPDWfvV3q/0AAKgMbgWgd999V2+88YYmTZqkWrVqaeDAgXrzzTc1ffp0ffXVV56uEVVYt2b1FR7srxut7rHp+1+DdWtWvzLLAgDgptwKQIWFhYqNjZX0/Q1Ri4qKJEk//vGPtXr1as9VhyrP18emNHuMJJULQdefp9ljWAANAKhS3ApATZo0UUFBgSSpefPm+vTTTyVJ27dvZ0dlC3q8bbgWD+mksGDXy1xhwf78BB4AUCW5tQj6pz/9qXJychQXF6cXX3xRQ4YM0dKlS3X06FFNnDjR0zWiGni8bbgeiwlT7pFzOnWhVI0Cv7/sxcwPAKAq8sg+QF999ZW2bNmiFi1ayG63e6IuU7EPEAAA1c/tfH975Gao999/v+6///5y7X379tWbb77JPbgAAECV4tWNEL/88kv9+9//9uZbAAAA3DZ2ggYAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAIAAJbj1QA0ZcoU1a/PPaAAAEDV4lYASk9P17Jly8q1L1u2TPPnz3c+T0lJUb169dwuDgAAwBvcCkCvvfaaWrVqVa69TZs2WrJkyR0XBQAA4E1u3w2+ot2dQ0JCnDdJBQAAqKrcCkCRkZHavHlzufbNmzcrIiLijosCAADwJrfuBfbss89qwoQJunr1qnr27ClJysnJ0S9/+UtNmjTJowUCAAB4mlsBaPLkyTp79qxeeOEFXblyRZLk7++vX/3qV0pJSfFogQAAAJ5mMwzDcPfgixcvau/evQoICFCLFi3k5+fnydpMU1xcrODgYBUVFSkoKMjscgAAwC24ne9vt2aArqtbt65zMXRNCT8AAKDmc2sRtMPh0KxZsxQcHKymTZuqadOmqlevnmbPni2Hw+HpGgEAADzKrRmgqVOnaunSpZo3b566d+8uSdq0aZNmzJih0tJSzZkzx6NFAgAAeJJba4AiIiK0ZMkS/eQnP3Fp/8tf/qIXXnhBx48f91iBZmANEAAA1c/tfH+7dQns3LlzFe4E3apVK507d86dUwIAAFQatwJQ+/bttWDBgnLtCxYsUPv27e+4KAAAAG9yaw3Qb37zGz3xxBP67LPPFB8fL0naunWrjh07pjVr1ni0QAAAAE+77Rmgq1evaubMmVqzZo369eun8+fP6/z58+rXr5/27dunBx54wBt1AgAAeMxtzwDddddd+vvf/67w8HD9+te/9kZNAAAAXuXWGqAhQ4Zo6dKlnq4FAACgUri1BujatWtatmyZPvvsM3Xu3Fl16tRxeT0jI8MjxQEAAHiDWwFo9+7d6tSpkyRp//79Lq/ZbLY7rwoAAMCL3ApAX3zxhafrAAAAqDRurQECAACozghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcghAAADAcqpEAFq4cKGioqLk7++vuLg45ebm3rR/ZmamoqOjFRAQoMjISE2cOFGlpaUV9p03b55sNpsmTJjghcoBAEB1ZHoAysrKUlJSktLS0rRz5061b99evXv31qlTpyrsv2LFCiUnJystLU179+7V0qVLlZWVpSlTppTru337dr322mtq166dt4cBAACqEdMDUEZGhp599lmNHDlSMTExWrJkiX70ox9p2bJlFfbfsmWLunfvrkGDBikqKkq9evXSwIEDy80aXbx4UYMHD9Ybb7yhu++++6Y1XL58WcXFxS4PAABQc5kagK5cuaK8vDwlJiY623x8fJSYmKitW7dWeExCQoLy8vKcgefw4cNas2aNnnjiCZd+Y8eOVd++fV3OfSPp6ekKDg52PiIjI+9gVAAAoKqrZeabnzlzRmVlZQoNDXVpDw0N1TfffFPhMYMGDdKZM2fUo0cPGYaha9eu6bnnnnO5BPbee+9p586d2r59+y3VkZKSoqSkJOfz4uJiQhAAADWY6ZfAbteGDRs0d+5cLVq0SDt37tSHH36o1atXa/bs2ZKkY8eOafz48Xr33Xfl7+9/S+f08/NTUFCQywMAANRcps4ANWzYUL6+vjp58qRL+8mTJxUWFlbhMampqRo6dKhGjx4tSYqNjVVJSYnGjBmjqVOnKi8vT6dOnVKnTp2cx5SVlenLL7/UggULdPnyZfn6+npvUAAAoMozdQaodu3a6ty5s3JycpxtDodDOTk5io+Pr/CYS5cuycfHtezrgcYwDD366KP6+uuvlZ+f73x06dJFgwcPVn5+PuEHAACYOwMkSUlJSRo+fLi6dOmibt26KTMzUyUlJRo5cqQkadiwYWrcuLHS09MlSXa7XRkZGerYsaPi4uJ08OBBpaamym63y9fXV4GBgWrbtq3Le9SpU0cNGjQo1w4AAKzJ9AA0YMAAnT59WtOnT1dhYaE6dOig7Oxs58Loo0ePusz4TJs2TTabTdOmTdPx48cVEhIiu92uOXPmmDUEAABQzdgMwzDMLqKqKS4uVnBwsIqKilgQDQBANXE739/V7ldgAAAAd4oABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdKBKCFCxcqKipK/v7+iouLU25u7k37Z2ZmKjo6WgEBAYqMjNTEiRNVWlrqfH3x4sVq166dgoKCFBQUpPj4eK1du9bbwwAAANWE6QEoKytLSUlJSktL086dO9W+fXv17t1bp06dqrD/ihUrlJycrLS0NO3du1dLly5VVlaWpkyZ4uzTpEkTzZs3T3l5edqxY4d69uypJ598Uv/4xz8qa1gAAKAKsxmGYZhZQFxcnLp27aoFCxZIkhwOhyIjI/Xiiy8qOTm5XP9x48Zp7969ysnJcbZNmjRJ27Zt06ZNm274PvXr19dvfvMbPfPMMz9YU3FxsYKDg1VUVKSgoCA3RgUAACrb7Xx/mzoDdOXKFeXl5SkxMdHZ5uPjo8TERG3durXCYxISEpSXl+e8THb48GGtWbNGTzzxRIX9y8rK9N5776mkpETx8fEV9rl8+bKKi4tdHgAAoOaqZeabnzlzRmVlZQoNDXVpDw0N1TfffFPhMYMGDdKZM2fUo0cPGYaha9eu6bnnnnO5BCZJX3/9teLj41VaWqq6detq5cqViomJqfCc6enpmjlzpmcGBQAAqjzT1wDdrg0bNmju3LlatGiRdu7cqQ8//FCrV6/W7NmzXfpFR0crPz9f27Zt0/PPP6/hw4drz549FZ4zJSVFRUVFzsexY8cqYygAAMAkps4ANWzYUL6+vjp58qRL+8mTJxUWFlbhMampqRo6dKhGjx4tSYqNjVVJSYnGjBmjqVOnysfn+0xXu3Zt3XfffZKkzp07a/v27Xr11Vf12muvlTunn5+f/Pz8PDk0AABQhZk6A1S7dm117tzZZUGzw+FQTk7ODdfrXLp0yRlyrvP19ZUk3Ww9t8Ph0OXLlz1QNQAAqO5MnQGSpKSkJA0fPlxdunRRt27dlJmZqZKSEo0cOVKSNGzYMDVu3Fjp6emSJLvdroyMDHXs2FFxcXE6ePCgUlNTZbfbnUEoJSVFffr00T333KMLFy5oxYoV2rBhg9atW2faOAEAQNVhegAaMGCATp8+renTp6uwsFAdOnRQdna2c2H00aNHXWZ8pk2bJpvNpmnTpun48eMKCQmR3W7XnDlznH1OnTqlYcOGqaCgQMHBwWrXrp3WrVunxx57rNLHBwAAqh7T9wGqitgHCACA6qfa7AMEAABgBgIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHNP3AbKSMoeh3CPndOpCqRoF+qtbs/ry9bGZXRYAAJZDAKok2bsLNPPjPSooKnW2hQf7K80eo8fbhptYGQAA1sMlsEqQvbtAz7+z0yX8SFJhUamef2ensncXmFQZAADWRADysjKHoZkf71FF221fb5v58R6VOdiQGwCAykIA8rLcI+fKzfz8N0NSQVGpco+cq7yiAACwOAKQl526cOPw404/AABw5whAXtYo0N+j/QAAwJ0jAHlZt2b1FR7srxv92N2m738N1q1Z/cosCwAASyMAeZmvj01p9hhJKheCrj9Ps8ewHxAAAJWIAFQJHm8brsVDOiks2PUyV1iwvxYP6cQ+QAAAVDI2Qqwkj7cN12MxYewEDQBAFUAAqkS+PjbFN29gdhkAAFgel8AAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlsBN0BQzDkCQVFxebXAkAALhV17+3r3+P3wwBqAIXLlyQJEVGRppcCQAAuF0XLlxQcHDwTfvYjFuJSRbjcDh04sQJBQYGymbz7M1Ki4uLFRkZqWPHjikoKMij564KGF/1V9PHWNPHJ9X8MTK+6s9bYzQMQxcuXFBERIR8fG6+yocZoAr4+PioSZMmXn2PoKCgGvt/bInx1QQ1fYw1fXxSzR8j46v+vDHGH5r5uY5F0AAAwHIIQAAAwHIIQJXMz89PaWlp8vPzM7sUr2B81V9NH2NNH59U88fI+Kq/qjBGFkEDAADLYQYIAABYDgEIAABYDgEIAABYDgEIAABYDgGoEqSnp6tr164KDAxUo0aN9NRTT2nfvn1ml+VRixcvVrt27ZybWsXHx2vt2rVml+U18+bNk81m04QJE8wuxSNmzJghm83m8mjVqpXZZXnc8ePHNWTIEDVo0EABAQGKjY3Vjh07zC7LI6Kiosr9G9psNo0dO9bs0jyirKxMqampatasmQICAtS8eXPNnj37lu75VJ1cuHBBEyZMUNOmTRUQEKCEhARt377d7LLc8uWXX8putysiIkI2m02rVq1yed0wDE2fPl3h4eEKCAhQYmKiDhw4UGn1EYAqwcaNGzV27Fh99dVXWr9+va5evapevXqppKTE7NI8pkmTJpo3b57y8vK0Y8cO9ezZU08++aT+8Y9/mF2ax23fvl2vvfaa2rVrZ3YpHtWmTRsVFBQ4H5s2bTK7JI/67rvv1L17d911111au3at9uzZo9/97ne6++67zS7NI7Zv3+7y77d+/XpJ0tNPP21yZZ4xf/58LV68WAsWLNDevXs1f/58vfzyy/rDH/5gdmkeNXr0aK1fv15//OMf9fXXX6tXr15KTEzU8ePHzS7ttpWUlKh9+/ZauHBhha+//PLL+v3vf68lS5Zo27ZtqlOnjnr37q3S0tLKKdBApTt16pQhydi4caPZpXjV3Xffbbz55ptml+FRFy5cMFq0aGGsX7/eeOihh4zx48ebXZJHpKWlGe3btze7DK/61a9+ZfTo0cPsMirN+PHjjebNmxsOh8PsUjyib9++xqhRo1za+vXrZwwePNikijzv0qVLhq+vr/HJJ5+4tHfq1MmYOnWqSVV5hiRj5cqVzucOh8MICwszfvOb3zjbzp8/b/j5+Rl/+tOfKqUmZoBMUFRUJEmqX7++yZV4R1lZmd577z2VlJQoPj7e7HI8auzYserbt68SExPNLsXjDhw4oIiICN17770aPHiwjh49anZJHvXRRx+pS5cuevrpp9WoUSN17NhRb7zxhtllecWVK1f0zjvvaNSoUR6/obNZEhISlJOTo/3790uSdu3apU2bNqlPnz4mV+Y5165dU1lZmfz9/V3aAwICatyM7JEjR1RYWOjytzQ4OFhxcXHaunVrpdTAzVArmcPh0IQJE9S9e3e1bdvW7HI86uuvv1Z8fLxKS0tVt25drVy5UjExMWaX5THvvfeedu7cWW2vx99MXFyc3nrrLUVHR6ugoEAzZ87UAw88oN27dyswMNDs8jzi8OHDWrx4sZKSkjRlyhRt375dL730kmrXrq3hw4ebXZ5HrVq1SufPn9eIESPMLsVjkpOTVVxcrFatWsnX11dlZWWaM2eOBg8ebHZpHhMYGKj4+HjNnj1brVu3VmhoqP70pz9p69atuu+++8wuz6MKCwslSaGhoS7toaGhzte8jQBUycaOHavdu3fXuDQvSdHR0crPz1dRUZH+/Oc/a/jw4dq4cWONCEHHjh3T+PHjtX79+nL/dVYT/Pd/Rbdr105xcXFq2rSp3n//fT3zzDMmVuY5DodDXbp00dy5cyVJHTt21O7du7VkyZIaF4CWLl2qPn36KCIiwuxSPOb999/Xu+++qxUrVqhNmzbKz8/XhAkTFBERUaP+/f74xz9q1KhRaty4sXx9fdWpUycNHDhQeXl5ZpdW43AJrBKNGzdOn3zyib744gs1adLE7HI8rnbt2rrvvvvUuXNnpaenq3379nr11VfNLssj8vLydOrUKXXq1Em1atVSrVq1tHHjRv3+979XrVq1VFZWZnaJHlWvXj21bNlSBw8eNLsUjwkPDy8Xxlu3bl3jLvV9++23+uyzzzR69GizS/GoyZMnKzk5WT/72c8UGxuroUOHauLEiUpPTze7NI9q3ry5Nm7cqIsXL+rYsWPKzc3V1atXde+995pdmkeFhYVJkk6ePOnSfvLkSedr3kYAqgSGYWjcuHFauXKlPv/8czVr1szskiqFw+HQ5cuXzS7DIx599FF9/fXXys/Pdz66dOmiwYMHKz8/X76+vmaX6FEXL17UoUOHFB4ebnYpHtO9e/dy20/s379fTZs2Naki71i+fLkaNWqkvn37ml2KR126dEk+Pq5fWb6+vnI4HCZV5F116tRReHi4vvvuO61bt05PPvmk2SV5VLNmzRQWFqacnBxnW3FxsbZt21Zpa0e5BFYJxo4dqxUrVugvf/mLAgMDndc3g4ODFRAQYHJ1npGSkqI+ffronnvu0YULF7RixQpt2LBB69atM7s0jwgMDCy3ZqtOnTpq0KBBjVjL9Ytf/EJ2u11NmzbViRMnlJaWJl9fXw0cONDs0jxm4sSJSkhI0Ny5c9W/f3/l5ubq9ddf1+uvv252aR7jcDi0fPlyDR8+XLVq1aw/73a7XXPmzNE999yjNm3a6G9/+5syMjI0atQos0vzqHXr1skwDEVHR+vgwYOaPHmyWrVqpZEjR5pd2m27ePGiyyzykSNHlJ+fr/r16+uee+7RhAkT9Otf/1otWrRQs2bNlJqaqoiICD311FOVU2Cl/NbM4iRV+Fi+fLnZpXnMqFGjjKZNmxq1a9c2QkJCjEcffdT49NNPzS7Lq2rSz+AHDBhghIeHG7Vr1zYaN25sDBgwwDh48KDZZXncxx9/bLRt29bw8/MzWrVqZbz++utml+RR69atMyQZ+/btM7sUjysuLjbGjx9v3HPPPYa/v79x7733GlOnTjUuX75sdmkelZWVZdx7771G7dq1jbCwMGPs2LHG+fPnzS7LLV988UWF333Dhw83DOP7n8KnpqYaoaGhhp+fn/Hoo49W6v93bYZRw7bRBAAA+AGsAQIAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAIAAJZDAAJgun/+85+y2WzKz883uxSnb775Rvfff7/8/f3VoUMHU2qIiopSZmamKe8N1HQEIAAaMWKEbDab5s2b59K+atUq2Ww2k6oyV1pamurUqaN9+/a53LDxv/G5AdUXAQiAJMnf31/z58/Xd999Z3YpHnPlyhW3jz106JB69Oihpk2bqkGDBjfsVxM/N8AKCEAAJEmJiYkKCwtTenr6DfvMmDGj3OWgzMxMRUVFOZ+PGDFCTz31lObOnavQ0FDVq1dPs2bN0rVr1zR58mTVr19fTZo00fLly8ud/5tvvlFCQoL8/f3Vtm1bbdy40eX13bt3q0+fPqpbt65CQ0M1dOhQnTlzxvn6ww8/rHHjxmnChAlq2LChevfuXeE4HA6HZs2apSZNmsjPz08dOnRQdna283Wbzaa8vDzNmjVLNptNM2bMuKPPTZI++OADtWnTRn5+foqKitLvfvc7l9dPnTolu92ugIAANWvWTO+++265c5w/f16jR49WSEiIgoKC1LNnT+3atcv5+q5du/TII48oMDBQQUFB6ty5s3bs2HHTugCrIgABkCT5+vpq7ty5+sMf/qB//etfd3Suzz//XCdOnNCXX36pjIwMpaWl6cc//rHuvvtubdu2Tc8995x+/vOfl3ufyZMna9KkSfrb3/6m+Ph42e12nT17VtL3X/49e/ZUx44dtWPHDmVnZ+vkyZPq37+/yznefvtt1a5dW5s3b9aSJUsqrO/VV1/V7373O/32t7/V3//+d/Xu3Vs/+clPdODAAUlSQUGB2rRpo0mTJqmgoEC/+MUvbjjWW/nc8vLy1L9/f/3sZz/T119/rRkzZig1NVVvvfWWs8+IESN07NgxffHFF/rzn/+sRYsW6dSpUy7nefrpp3Xq1CmtXbtWeXl56tSpkx599FGdO3dOkjR48GA1adJE27dvV15enpKTk3XXXXfdsHbA0irtvvMAqqzhw4cbTz75pGEYhnH//fcbo0aNMgzDMFauXGn895+JtLQ0o3379i7HvvLKK0bTpk1dztW0aVOjrKzM2RYdHW088MADzufXrl0z6tSpY/zpT38yDMMwjhw5Ykgy5s2b5+xz9epVo0mTJsb8+fMNwzCM2bNnG7169XJ572PHjhmSjH379hmGYRgPPfSQ0bFjxx8cb0REhDFnzhyXtq5duxovvPCC83n79u2NtLS0m57nVj+3QYMGGY899pjLsZMnTzZiYmIMwzCMffv2GZKM3Nxc5+t79+41JBmvvPKKYRiG8de//tUICgoySktLXc7TvHlz47XXXjMMwzACAwONt9566wdGD8AwDIMZIAAu5s+fr7ffflt79+51+xxt2rSRj89//ryEhoYqNjbW+dzX11cNGjQoN8MRHx/v/N+1atVSly5dnHXs2rVLX3zxherWret8tGrVStL363Wu69y5801rKy4u1okTJ9S9e3eX9u7du9/RmG/2ue3du7fC9ztw4IDKysq0d+9e1apVy6X2Vq1aqV69es7nu3bt0sWLF9WgQQOXz+DIkSPO8SclJWn06NFKTEzUvHnzXD4XAK4IQABcPPjgg+rdu7dSUlLKvebj4yPDMFzarl69Wq7f/152sdlsFbY5HI5bruvixYuy2+3Kz893eRw4cEAPPvigs1+dOnVu+ZyedLPPzRMuXryo8PDwcuPft2+fJk+eLOn7NVr/+Mc/1LdvX33++eeKiYnRypUrvVIPUN3VMrsAAFXPvHnz1KFDB0VHR7u0h4SEqLCwUIZhOH/m7cm9e7766itnmLl27Zry8vI0btw4SVKnTp30wQcfKCoqSrVquf+nKygoSBEREdq8ebMeeughZ/vmzZvVrVu3O6r/Rp9b69attXnzZpe2zZs3q2XLlvL19VWrVq2c4+3ataskad++fTp//ryzf6dOnVRYWKhatWq5LDr/Xy1btlTLli01ceJEDRw4UMuXL9dPf/rTOxoXUBMxAwSgnNjYWA0ePFi///3vXdoffvhhnT59Wi+//LIOHTqkhQsXau3atR5734ULF2rlypX65ptvNHbsWH333XcaNWqUJGns2LE6d+6cBg4cqO3bt+vQoUNat26dRo4cqbKystt6n8mTJ2v+/PnKysrSvn37lJycrPz8fI0fP/6O6r/R5zZp0iTl5ORo9uzZ2r9/v95++20tWLDAubg6Ojpajz/+uH7+859r27ZtysvL0+jRoxUQEOA8R2JiouLj4/XUU0/p008/1T//+U9t2bJFU6dO1Y4dO/Tvf/9b48aN04YNG/Ttt99q8+bN2r59u1q3bn1HYwJqKgIQgArNmjWr3CWq1q1ba9GiRVq4cKHat2+v3Nzcm/5C6nbNmzdP8+bNU/v27bVp0yZ99NFHatiwoSQ5Z23KysrUq1cvxcbGasKECapXr57LeqNb8dJLLykpKUmTJk1SbGyssrOz9dFHH6lFixZ3PIaKPrdOnTrp/fff13vvvae2bdtq+vTpmjVrlkaMGOHss3z5ckVEROihhx5Sv379NGbMGDVq1Mj5us1m05o1a/Tggw9q5MiRatmypX72s5/p22+/VWhoqHx9fXX27FkNGzZMLVu2VP/+/dWnTx/NnDnzjscE1EQ2438v6AMAANRwzAABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADL+f8hTZF8xDIfyQAAAABJRU5ErkJggg==", + "image/png": "", "text/plain": [ "
" ] @@ -151,26 +156,38 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 50/50 [02:24<00:00, 2.89s/it]\n" + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", + "Perhaps you already have a cluster running?\n", + "Hosting the HTTP server on port 37681 instead\n", + " warnings.warn(\n", + "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.loc[key,column_names] = data\n", + "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.loc[key,column_names] = data\n", + "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_crossover' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + " self.evaluated_individuals.at[new_child.unique_id(),\"Variation_Function\"] = var_op\n", + "Generation: 100%|██████████| 20/20 [00:05<00:00, 3.52it/s]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_stochastic_gradient.py:1575: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", + " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "-53.572578179092396\n" + "-6120.015400135764\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -182,33 +199,31 @@ "source": [ "import tpot2\n", "import sklearn.datasets\n", - "from sklearn.linear_model import SGDRegressor\n", - "import numpy as np\n", - "from tpot2.builtin_modules import ZeroTransformer, OneTransformer\n", - "from tpot2.config.regressors import params_SGDRegressor\n", "\n", - "root_config_dict = {SGDRegressor: params_SGDRegressor}\n", - "leaf_config_dict = [\"feature_set_selector\", {ZeroTransformer: {}, OneTransformer: {}}]\n", + "scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')\n", + "X, y = sklearn.datasets.load_diabetes(return_X_y=True)\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", "\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space(\"SGDRegressor\"),\n", + " leaf_search_space = tpot2.search_spaces.nodes.FSSNode(subsets=n_features), \n", + " inner_search_space = tpot2.config.get_search_space([\"arithmatic\"]),\n", + " max_size = 10,\n", + ")\n", "\n", - "est = tpot2.TPOTEstimator(population_size=100,generations=50,\n", + "est = tpot2.TPOTEstimator(population_size=10,generations=20, \n", " scorers=['neg_mean_squared_error'],\n", " scorers_weights=[1],\n", " other_objective_functions=[tpot2.objectives.number_of_nodes_objective],\n", " other_objective_functions_weights=[-1],\n", " n_jobs=32,\n", " classification=False,\n", - " inner_config_dict= \"arithmetic_transformer\",\n", - " leaf_config_dict=leaf_config_dict,\n", - " root_config_dict=root_config_dict,\n", + " search_space = graph_search_space ,\n", " verbose=1,\n", - " processes=False,\n", " )\n", "\n", "\n", - "scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')\n", - "X, y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=6)\n", - "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", + "\n", "est.fit(X_train, y_train)\n", "print(scorer(est, X_test, y_test))\n", "est.fitted_pipeline_.plot()" @@ -216,23 +231,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "SGDRegressor_1 : SGDRegressor(alpha=1.6814005088136593e-05, eta0=0.6868335822696461,\n", - " fit_intercept=False, l1_ratio=0.5144783118066449,\n", - " learning_rate='constant', loss='huber', penalty='elasticnet',\n", - " power_t=5.487407069184651)\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='34', sel_subset=[34])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='17', sel_subset=[17])\n", - "FeatureSetSelector_3 : FeatureSetSelector(name='16', sel_subset=[16])\n", - "FeatureSetSelector_4 : FeatureSetSelector(name='3', sel_subset=[3])\n", - "FeatureSetSelector_5 : FeatureSetSelector(name='19', sel_subset=[19])\n", - "ZeroTransformer_1 : ZeroTransformer()\n" + "SGDRegressor_1 : SGDRegressor()\n", + "FeatureSetSelector_1 : FeatureSetSelector(name='7', sel_subset=[7])\n", + "FeatureSetSelector_2 : FeatureSetSelector(name='7', sel_subset=[7])\n" ] } ], @@ -244,12 +252,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -286,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/5_Genetic_Feature_Selection.ipynb b/Tutorial/5_Genetic_Feature_Selection.ipynb new file mode 100644 index 00000000..96bf78b1 --- /dev/null +++ b/Tutorial/5_Genetic_Feature_Selection.ipynb @@ -0,0 +1,596 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Genetic Feature Selection\n", + "\n", + "This example creates a pipeline where the first step selects a subset of features, and the following step is a graph pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 0%| | 0/5 [00:00#sk-container-id-1 {\n", + " /* Definition of color scheme common for light and dark mode */\n", + " --sklearn-color-text: black;\n", + " --sklearn-color-line: gray;\n", + " /* Definition of color scheme for unfitted estimators */\n", + " --sklearn-color-unfitted-level-0: #fff5e6;\n", + " --sklearn-color-unfitted-level-1: #f6e4d2;\n", + " --sklearn-color-unfitted-level-2: #ffe0b3;\n", + " --sklearn-color-unfitted-level-3: chocolate;\n", + " /* Definition of color scheme for fitted estimators */\n", + " --sklearn-color-fitted-level-0: #f0f8ff;\n", + " --sklearn-color-fitted-level-1: #d4ebff;\n", + " --sklearn-color-fitted-level-2: #b3dbfd;\n", + " --sklearn-color-fitted-level-3: cornflowerblue;\n", + "\n", + " /* Specific color for light theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-icon: #696969;\n", + "\n", + " @media (prefers-color-scheme: dark) {\n", + " /* Redefinition of color scheme for dark theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-icon: #878787;\n", + " }\n", + "}\n", + "\n", + "#sk-container-id-1 {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "#sk-container-id-1 pre {\n", + " padding: 0;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-hidden--visually {\n", + " border: 0;\n", + " clip: rect(1px 1px 1px 1px);\n", + " clip: rect(1px, 1px, 1px, 1px);\n", + " height: 1px;\n", + " margin: -1px;\n", + " overflow: hidden;\n", + " padding: 0;\n", + " position: absolute;\n", + " width: 1px;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-dashed-wrapped {\n", + " border: 1px dashed var(--sklearn-color-line);\n", + " margin: 0 0.4em 0.5em 0.4em;\n", + " box-sizing: border-box;\n", + " padding-bottom: 0.4em;\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-container {\n", + " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", + " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", + " so we also need the `!important` here to be able to override the\n", + " default hidden behavior on the sphinx rendered scikit-learn.org.\n", + " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", + " display: inline-block !important;\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-text-repr-fallback {\n", + " display: none;\n", + "}\n", + "\n", + "div.sk-parallel-item,\n", + "div.sk-serial,\n", + "div.sk-item {\n", + " /* draw centered vertical line to link estimators */\n", + " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", + " background-size: 2px 100%;\n", + " background-repeat: no-repeat;\n", + " background-position: center center;\n", + "}\n", + "\n", + "/* Parallel-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item::after {\n", + " content: \"\";\n", + " width: 100%;\n", + " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", + " flex-grow: 1;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel {\n", + " display: flex;\n", + " align-items: stretch;\n", + " justify-content: center;\n", + " background-color: var(--sklearn-color-background);\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item {\n", + " display: flex;\n", + " flex-direction: column;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n", + " align-self: flex-end;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n", + " align-self: flex-start;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n", + " width: 0;\n", + "}\n", + "\n", + "/* Serial-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-serial {\n", + " display: flex;\n", + " flex-direction: column;\n", + " align-items: center;\n", + " background-color: var(--sklearn-color-background);\n", + " padding-right: 1em;\n", + " padding-left: 1em;\n", + "}\n", + "\n", + "\n", + "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", + "clickable and can be expanded/collapsed.\n", + "- Pipeline and ColumnTransformer use this feature and define the default style\n", + "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", + "*/\n", + "\n", + "/* Pipeline and ColumnTransformer style (default) */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable {\n", + " /* Default theme specific background. It is overwritten whether we have a\n", + " specific estimator or a Pipeline/ColumnTransformer */\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "/* Toggleable label */\n", + "#sk-container-id-1 label.sk-toggleable__label {\n", + " cursor: pointer;\n", + " display: block;\n", + " width: 100%;\n", + " margin-bottom: 0;\n", + " padding: 0.5em;\n", + " box-sizing: border-box;\n", + " text-align: center;\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n", + " /* Arrow on the left of the label */\n", + " content: \"▸\";\n", + " float: left;\n", + " margin-right: 0.25em;\n", + " color: var(--sklearn-color-icon);\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "/* Toggleable content - dropdown */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content {\n", + " max-height: 0;\n", + " max-width: 0;\n", + " overflow: hidden;\n", + " text-align: left;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content pre {\n", + " margin: 0.2em;\n", + " border-radius: 0.25em;\n", + " color: var(--sklearn-color-text);\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", + " /* Expand drop-down */\n", + " max-height: 200px;\n", + " max-width: 100%;\n", + " overflow: auto;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", + " content: \"▾\";\n", + "}\n", + "\n", + "/* Pipeline/ColumnTransformer-specific style */\n", + "\n", + "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator-specific style */\n", + "\n", + "/* Colorize estimator box */\n", + "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n", + "#sk-container-id-1 div.sk-label label {\n", + " /* The background is the default theme color */\n", + " color: var(--sklearn-color-text-on-default-background);\n", + "}\n", + "\n", + "/* On hover, darken the color of the background */\n", + "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "/* Label box, darken color on hover, fitted */\n", + "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator label */\n", + "\n", + "#sk-container-id-1 div.sk-label label {\n", + " font-family: monospace;\n", + " font-weight: bold;\n", + " display: inline-block;\n", + " line-height: 1.2em;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label-container {\n", + " text-align: center;\n", + "}\n", + "\n", + "/* Estimator-specific */\n", + "#sk-container-id-1 div.sk-estimator {\n", + " font-family: monospace;\n", + " border: 1px dotted var(--sklearn-color-border-box);\n", + " border-radius: 0.25em;\n", + " box-sizing: border-box;\n", + " margin-bottom: 0.5em;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "/* on hover */\n", + "#sk-container-id-1 div.sk-estimator:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", + "\n", + "/* Common style for \"i\" and \"?\" */\n", + "\n", + ".sk-estimator-doc-link,\n", + "a:link.sk-estimator-doc-link,\n", + "a:visited.sk-estimator-doc-link {\n", + " float: right;\n", + " font-size: smaller;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1em;\n", + " height: 1em;\n", + " width: 1em;\n", + " text-decoration: none !important;\n", + " margin-left: 1ex;\n", + " /* unfitted */\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted,\n", + "a:link.sk-estimator-doc-link.fitted,\n", + "a:visited.sk-estimator-doc-link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "/* Span, style for the box shown on hovering the info icon */\n", + ".sk-estimator-doc-link span {\n", + " display: none;\n", + " z-index: 9999;\n", + " position: relative;\n", + " font-weight: normal;\n", + " right: .2ex;\n", + " padding: .5ex;\n", + " margin: .5ex;\n", + " width: min-content;\n", + " min-width: 20ex;\n", + " max-width: 50ex;\n", + " color: var(--sklearn-color-text);\n", + " box-shadow: 2pt 2pt 4pt #999;\n", + " /* unfitted */\n", + " background: var(--sklearn-color-unfitted-level-0);\n", + " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted span {\n", + " /* fitted */\n", + " background: var(--sklearn-color-fitted-level-0);\n", + " border: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link:hover span {\n", + " display: block;\n", + "}\n", + "\n", + "/* \"?\"-specific style due to the `` HTML tag */\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link {\n", + " float: right;\n", + " font-size: 1rem;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1rem;\n", + " height: 1rem;\n", + " width: 1rem;\n", + " text-decoration: none;\n", + " /* unfitted */\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "#sk-container-id-1 a.estimator_doc_link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "
Pipeline(steps=[('maskselector',\n",
+       "                 MaskSelector(mask=array([ True,  True, False, False, False, False, False, False, False,\n",
+       "        True,  True, False,  True, False,  True,  True,  True, False,\n",
+       "       False, False,  True, False, False, False, False,  True, False,\n",
+       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
+       "        True,  True, False, False,  True, False,  True,  True, False,\n",
+       "       False,  True, False,  True, False,  True, False,  True, Fa...\n",
+       "        True, False,  True, False,  True,  True,  True, False,  True,\n",
+       "        True,  True,  True,  True,  True,  True, False,  True,  True,\n",
+       "        True, False, False, False,  True,  True, False,  True,  True,\n",
+       "        True,  True, False, False, False,  True, False,  True, False,\n",
+       "        True, False, False,  True, False,  True, False, False, False,\n",
+       "        True]))),\n",
+       "                ('graphpipeline',\n",
+       "                 GraphPipeline(graph=<networkx.classes.digraph.DiGraph object at 0x76ebe05ee590>))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('maskselector',\n", + " MaskSelector(mask=array([ True, True, False, False, False, False, False, False, False,\n", + " True, True, False, True, False, True, True, True, False,\n", + " False, False, True, False, False, False, False, True, False,\n", + " True, True, True, True, True, True, True, True, False,\n", + " True, True, False, False, True, False, True, True, False,\n", + " False, True, False, True, False, True, False, True, Fa...\n", + " True, False, True, False, True, True, True, False, True,\n", + " True, True, True, True, True, True, False, True, True,\n", + " True, False, False, False, True, True, False, True, True,\n", + " True, True, False, False, False, True, False, True, False,\n", + " True, False, False, True, False, True, False, False, False,\n", + " True]))),\n", + " ('graphpipeline',\n", + " GraphPipeline(graph=))])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fitted_pipeline_" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "est.fitted_pipeline_.steps[1][1].plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tpot2env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Tutorial/5_GraphPipeline.ipynb b/Tutorial/6_GraphPipeline.ipynb similarity index 100% rename from Tutorial/5_GraphPipeline.ipynb rename to Tutorial/6_GraphPipeline.ipynb diff --git a/Tutorial/7_dask_parallelization.ipynb b/Tutorial/7_dask_parallelization.ipynb index cbbfc28d..4769c491 100644 --- a/Tutorial/7_dask_parallelization.ipynb +++ b/Tutorial/7_dask_parallelization.ipynb @@ -57,7 +57,27 @@ " scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n", " X, y = sklearn.datasets.load_digits(return_X_y=True)\n", " X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - " est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n", + " \n", + " graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + " est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " population_size= 10,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + " )\n", + " \n", + " \n", " est.fit(X_train, y_train)\n", " print(scorer(est, X_test, y_test))" ] @@ -106,7 +126,27 @@ "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", "\n", "\n", - "est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " population_size= 10,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + " n_jobs=10,\n", + " memory_limit=\"4GB\"\n", + ")\n", + "\n", "est.fit(X_train, y_train)\n", "print(scorer(est, X_test, y_test))" ] @@ -214,7 +254,27 @@ } ], "source": [ - "est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " client = client,\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " population_size= 10,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + ")\n", + "\n", + "\n", "# this is equivalent to: \n", "# est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n", "est.fit(X_train, y_train)\n", @@ -283,7 +343,25 @@ " threads_per_worker=1,\n", " memory_limit='4GB',\n", ") as cluster, Client(cluster) as client:\n", - " est = tpot2.TPOTEstimatorSteadyState(client=client, n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n", + " graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + " est = tpot2.TPOTEstimator(\n", + " client = client,\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " population_size= 10,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + " )\n", " est.fit(X_train, y_train)\n", " print(scorer(est, X_test, y_test))" ] @@ -349,7 +427,26 @@ " X, y = sklearn.datasets.load_digits(return_X_y=True)\n", " X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", "\n", - " est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n", + " graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + " est = tpot2.TPOTEstimator(\n", + " client = client,\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " population_size= 10,\n", + " generations = 5,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + " )\n", + " est.fit(X_train, y_train)\n", " # this is equivalent to: \n", " # est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n", " est.fit(X_train, y_train)\n", diff --git a/Tutorial/6_SH_and_early_termination.ipynb b/Tutorial/8_SH_and_early_termination.ipynb similarity index 97% rename from Tutorial/6_SH_and_early_termination.ipynb rename to Tutorial/8_SH_and_early_termination.ipynb index 1b033644..8b6c2e49 100644 --- a/Tutorial/6_SH_and_early_termination.ipynb +++ b/Tutorial/8_SH_and_early_termination.ipynb @@ -186,26 +186,33 @@ "\n", "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", "\n", - "est = tpot2.TPOTEstimator( \n", - " generations=5,\n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= [\"transformers\"],\n", - " leaf_config_dict=\"selectors\",\n", - " n_jobs=32,\n", - " cv=2,\n", - " max_eval_time_seconds=30,\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = graph_search_space,\n", + " generations = 50,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 2,\n", + "\n", + "\n", + " population_size=population_size,\n", + " initial_population_size=initial_population_size,\n", + " population_scaling = population_scaling,\n", + " generations_until_end_population = generations_until_end_population,\n", + " \n", + " budget_range = budget_range,\n", + " generations_until_end_budget=generations_until_end_budget,\n", + " )\n", "\n", - " population_size=population_size,\n", - " initial_population_size=initial_population_size,\n", - " population_scaling = population_scaling,\n", - " generations_until_end_population = generations_until_end_population,\n", - " \n", - " budget_range = budget_range,\n", - " generations_until_end_budget=generations_until_end_budget,\n", - " verbose=0)\n", "\n", "\n", "start = time.time()\n", @@ -296,14 +303,20 @@ } ], "source": [ + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + " )\n", + "\n", + "\n", "est = tpot2.TPOTEstimator( \n", " generations=5,\n", " scorers=['roc_auc_ovr'],\n", " scorers_weights=[1],\n", " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= [\"transformers\"],\n", - " leaf_config_dict=\"selectors\",\n", + " search_space = graph_search_space,\n", " n_jobs=32,\n", " cv=cv,\n", " \n", @@ -369,14 +382,15 @@ } ], "source": [ + "\n", + "\n", + "\n", "est = tpot2.TPOTEstimator( \n", " generations=5,\n", " scorers=['roc_auc_ovr'],\n", " scorers_weights=[1],\n", " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= [\"transformers\"],\n", - " leaf_config_dict=\"selectors\",\n", + " search_space = graph_search_space,\n", " n_jobs=32,\n", " cv=cv,\n", "\n", @@ -447,9 +461,7 @@ " scorers=['roc_auc_ovr'],\n", " scorers_weights=[1],\n", " classification=True,\n", - " root_config_dict=\"classifiers\",\n", - " inner_config_dict= [\"transformers\"],\n", - " leaf_config_dict=\"selectors\",\n", + " search_space = graph_search_space,\n", " n_jobs=32,\n", " cv=cv,\n", "\n", diff --git a/Tutorial/8_Genetic_Algorithm_Overview.ipynb b/Tutorial/9_Genetic_Algorithm_Overview.ipynb similarity index 100% rename from Tutorial/8_Genetic_Algorithm_Overview.ipynb rename to Tutorial/9_Genetic_Algorithm_Overview.ipynb diff --git a/setup.py b/setup.py index f0977acd..7deca183 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ def calculate_version(): setup( name='TPOT2', + python_requires='<3.12', #for configspace compatibility version=package_version, author='Pedro Ribeiro', packages=find_packages(), @@ -33,7 +34,7 @@ def calculate_version(): 'update_checker>=0.16', 'tqdm>=4.36.1', 'stopit>=1.1.1', - 'pandas>=1.5.3,<2.0.0', + 'pandas>=2.2.1', 'joblib>=1.1.1', 'xgboost>=1.7.0', 'matplotlib>=3.6.2', diff --git a/tpot2/config/autoqtl_builtins.py b/tpot2/config/autoqtl_builtins.py index b317fe70..d649bacd 100644 --- a/tpot2/config/autoqtl_builtins.py +++ b/tpot2/config/autoqtl_builtins.py @@ -6,17 +6,12 @@ from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -def get_FeatureEncodingFrequencySelector_ConfigurationSpace(): - return ConfigurationSpace( - space = { - 'threshold': Float("threshold", bounds=(0, .35)) - } - ) +FeatureEncodingFrequencySelector_ConfigurationSpace = ConfigurationSpace( + space = { + 'threshold': Float("threshold", bounds=(0, .35)) + } +) -def get_encoder_ConfigurationSpace(): - return ConfigurationSpace( - space = {} - ) # genetic_encoders.DominantEncoder : {}, # genetic_encoders.RecessiveEncoder : {}, diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py index 939df92f..a158a9a6 100644 --- a/tpot2/config/classifiers_sklearnex.py +++ b/tpot2/config/classifiers_sklearnex.py @@ -1,13 +1,6 @@ -from sklearnex.ensemble import RandomForestClassifier -from sklearnex.neighbors import KNeighborsClassifier -from sklearnex.svm import SVC -from sklearnex.svm import NuSVC -from sklearnex.linear_model import LogisticRegression - -import numpy as np from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -from functools import partial + def get_RandomForestClassifier_ConfigurationSpace(random_state=None): space = { diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 0710c29e..2c4485bf 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -1,3 +1,8 @@ +import importlib.util +import sys +import numpy as np +import warnings + from ..search_spaces.nodes import EstimatorNode from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline @@ -10,7 +15,14 @@ from . import mdr_configs from . import special_configs -import numpy as np +from . import classifiers_sklearnex +from . import regressors_sklearnex + + + +#autoqtl_builtins +from tpot2.builtin_modules import genetic_encoders +from tpot2.builtin_modules import feature_encoding_frequency_selector from sklearn.linear_model import SGDClassifier from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier @@ -80,37 +92,117 @@ from tpot2.builtin_modules import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor +from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer + + +#MDR + + all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, RFE, SelectFromModel, f_classif, f_regression, SGDRegressor, LinearRegression, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, + AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer, ] + +#if mdr is installed +if 'mdr' in sys.modules: + from mdr import MDR, ContinuousMDR + all_methods.append(MDR) + all_methods.append(ContinuousMDR) + +if 'skrebate' in sys.modules: + from skrebate import ReliefF, SURF, SURFstar, MultiSURF + all_methods.append(ReliefF) + all_methods.append(SURF) + all_methods.append(SURFstar) + all_methods.append(MultiSURF) + +if 'sklearnex' in sys.modules: + from sklearnex.linear_model import LinearRegression + from sklearnex.linear_model import Ridge + from sklearnex.linear_model import Lasso + from sklearnex.linear_model import ElasticNet + from sklearnex.svm import SVR + from sklearnex.svm import NuSVR + from sklearnex.ensemble import RandomForestRegressor + from sklearnex.neighbors import KNeighborsRegressor + + from sklearnex.ensemble import RandomForestClassifier + from sklearnex.neighbors import KNeighborsClassifier + from sklearnex.svm import SVC + from sklearnex.svm import NuSVC + from sklearnex.linear_model import LogisticRegression + + + all_methods.append(LinearRegression) + all_methods.append(Ridge) + all_methods.append(Lasso) + all_methods.append(ElasticNet) + all_methods.append(SVR) + all_methods.append(NuSVR) + all_methods.append(RandomForestRegressor) + all_methods.append(KNeighborsRegressor) + + all_methods.append(RandomForestClassifier) + all_methods.append(KNeighborsClassifier) + all_methods.append(SVC) + all_methods.append(NuSVC) + all_methods.append(LogisticRegression) + + STRING_TO_CLASS = { t.__name__: t for t in all_methods } + + GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], - "classifiers" : ["BernoulliNB", "DecisionTreeClassifier", "ExtraTreesClassifier", "GaussianNB", "GradientBoostingClassifier", "KNeighborsClassifier", "LinearDiscriminantAnalysis", "LinearSVC", "QuadraticDiscriminantAnalysis", "PassiveAggressiveClassifier", "LogisticRegression", "MLPClassifier", "MultinomialNB", "PassiveAggressiveClassifier", "Perceptron", "QuadraticDiscriminantAnalysis", "RandomForestClassifier", "RidgeClassifier", "SGDClassifier", "SVC", "XGBClassifier", "LGBMClassifier"], + "classifiers" : ["LogisticRegression", "DecisionTreeClassifier", "KNeighborsClassifier", "GradientBoostingClassifier", "ExtraTreesClassifier", "RandomForestClassifier", "SGDClassifier", "GaussianNB", "BernoulliNB", "MultinomialNB", "XGBClassifier", "SVC", "MLPClassifier"], + "regressors" : ["ElasticNetCV", "ExtraTreesRegressor", "GradientBoostingRegressor", "AdaBoostRegressor", "DecisionTreeRegressor", "KNeighborsRegressor", "LassoLarsCV", "SVR", "RandomForestRegressor", "RidgeCV", "XGBRegressor", "SGDRegressor" ], "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"], + "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer", "ZeroTransformer", "OneTransformer", "NTransformer"], + "imputers": [], + "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], + "genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"], + + "classifiers_sklearnex" : ["RandomForestClassifier_sklearnex", "LogisticRegression_sklearnex", "KNeighborsClassifier_sklearnex", "SVC_sklearnex","NuSVC_sklearnex"], + "regressors_sklearnex" : ["LinearRegression_sklearnex", "Ridge_sklearnex", "Lasso_sklearnex", "ElasticNet_sklearnex", "SVR_sklearnex", "NuSVR_sklearnex", "RandomForestRegressor_sklearnex", "KNeighborsRegressor_sklearnex"], } -def get_configspace(name, n_classes=3, n_samples=100, random_state=None): +def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_state=None): match name: + + #autoqtl_builtins.py + case "FeatureEncodingFrequencySelector": + return autoqtl_builtins.FeatureEncodingFrequencySelector_ConfigurationSpace + case "DominantEncoder": + return {} + case "RecessiveEncoder": + return {} + case "HeterosisEncoder": + return {} + case "UnderDominanceEncoder": + return {} + case "OverDominanceEncoder": + return {} + + #classifiers.py case "LogisticRegression": - return classifiers.get_LogisticRegression_ConfigurationSpace() + return classifiers.get_LogisticRegression_ConfigurationSpace(random_state=random_state) case "KNeighborsClassifier": return classifiers.get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) case "DecisionTreeClassifier": - return classifiers.get_DecisionTreeClassifier_ConfigurationSpace() + return classifiers.get_DecisionTreeClassifier_ConfigurationSpace(random_state=random_state) case "SVC": - return classifiers.get_SVC_ConfigurationSpace() + return classifiers.get_SVC_ConfigurationSpace(random_state=random_state) case "LinearSVC": - return classifiers.get_LinearSVC_ConfigurationSpace() + return classifiers.get_LinearSVC_ConfigurationSpace(random_state=random_state) case "RandomForestClassifier": return classifiers.get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) case "GradientBoostingClassifier": @@ -129,6 +221,8 @@ def get_configspace(name, n_classes=3, n_samples=100, random_state=None): return classifiers.get_BernoulliNB_ConfigurationSpace() case "MultinomialNB": return classifiers.get_MultinomialNB_ConfigurationSpace() + case "GaussianNB": + return {} #transformers.py case "Binarizer": @@ -142,13 +236,13 @@ def get_configspace(name, n_classes=3, n_samples=100, random_state=None): case "OneHotEncoder": return transformers.OneHotEncoder_configspace case "FastICA": - return transformers.get_FastICA_configspace() + return transformers.get_FastICA_configspace(n_features=n_features, random_state=random_state) case "FeatureAgglomeration": - return transformers.get_FeatureAgglomeration_configspace() + return transformers.get_FeatureAgglomeration_configspace(n_features=n_features,) case "Nystroem": - return transformers.get_Nystroem_configspace() + return transformers.get_Nystroem_configspace(n_features=n_features, random_state=random_state) case "RBFSampler": - return transformers.get_RBFSampler_configspace() + return transformers.get_RBFSampler_configspace(n_features=n_features, random_state=random_state) #selectors.py case "SelectFwe": @@ -162,32 +256,113 @@ def get_configspace(name, n_classes=3, n_samples=100, random_state=None): case "SelectFromModel": return selectors.SelectFromModel_configspace_part - return None + #special_configs.py + case "AddTransformer": + return {} + case "mul_neg_1_Transformer": + return {} + case "MulTransformer": + return {} + case "SafeReciprocalTransformer": + return {} + case "EQTransformer": + return {} + case "NETransformer": + return {} + case "GETransformer": + return {} + case "GTTransformer": + return {} + case "LETransformer": + return {} + case "LTTransformer": + return {} + case "MinTransformer": + return {} + case "MaxTransformer": + return {} + case "ZeroTransformer": + return {} + case "OneTransformer": + return {} + case "NTransformer": + return {} + + #imputers.py + + #mdr_configs.py + case "MDR": + return mdr_configs.MDR_configspace + case "ContinuousMDR": + return mdr_configs.MDR_configspace + case "ReliefF": + return mdr_configs.get_skrebate_ReliefF_config_space(n_features=n_features) + case "SURF": + return mdr_configs.get_skrebate_SURF_config_space(n_features=n_features) + case "SURFstar": + return mdr_configs.get_skrebate_SURFstar_config_space(n_features=n_features) + case "MultiSURF": + return mdr_configs.get_skrebate_MultiSURF_config_space(n_features=n_features) + + #classifiers_sklearnex.py + case "RandomForestClassifier_sklearnex": + return classifiers_sklearnex.get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) + case "LogisticRegression_sklearnex": + return classifiers_sklearnex.get_LogisticRegression_ConfigurationSpace(random_state=random_state) + case "KNeighborsClassifier_sklearnex": + return classifiers_sklearnex.get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) + case "SVC_sklearnex": + return classifiers_sklearnex.get_SVC_ConfigurationSpace(random_state=random_state) + case "NuSVC_sklearnex": + return classifiers_sklearnex.get_NuSVC_ConfigurationSpace(random_state=random_state) + + #regressors_sklearnex.py + case "LinearRegression_sklearnex": + return {} + case "Ridge_sklearnex": + return regressors_sklearnex.get_Ridge_ConfigurationSpace(random_state=random_state) + case "Lasso_sklearnex": + return regressors_sklearnex.get_Lasso_ConfigurationSpace(random_state=random_state) + case "ElasticNet_sklearnex": + return regressors_sklearnex.get_ElasticNet_ConfigurationSpace(random_state=random_state) + case "SVR_sklearnex": + return regressors_sklearnex.get_SVR_ConfigurationSpace(random_state=random_state) + case "NuSVR_sklearnex": + return regressors_sklearnex.get_NuSVR_ConfigurationSpace(random_state=random_state) + case "RandomForestRegressor_sklearnex": + return regressors_sklearnex.get_RandomForestRegressor_ConfigurationSpace(random_state=random_state) + case "KNeighborsRegressor_sklearnex": + return regressors_sklearnex.get_KNeighborsRegressor_ConfigurationSpace(n_samples=n_samples) + + return {} -def get_search_space(name, n_classes=3, n_samples=100, random_state=None): - name = GROUPNAMES[name] +def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None): - if name is None: - return None - - if name not in STRING_TO_CLASS: - return None #if list of names, return a list of EstimatorNodes if isinstance(name, list) or isinstance(name, np.ndarray): - search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, random_state=random_state) for n in name] + search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) for n in name] #remove Nones search_spaces = [s for s in search_spaces if s is not None] - return ChoicePipeline(choice_list=search_spaces) - else: - return get_node(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + + if name in GROUPNAMES: + name_list = GROUPNAMES[name] + return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + + if name is None: + return None + if name not in STRING_TO_CLASS: + return None + + return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) -def get_node(name, n_classes=3, n_samples=100, random_state=None): - #these are wrappers +def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None): + + #these are wrappers that take in another estimator as a parameter if name == "RFE_classification": rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) @@ -206,5 +381,10 @@ def get_node(name, n_classes=3, n_samples=100, random_state=None): return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) - configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + if configspace is None: + #raise warning + warnings.warn(f"Could not find configspace for {name}") + return None + return EstimatorNode(STRING_TO_CLASS[name], configspace) diff --git a/tpot2/config/mdr_configs.py b/tpot2/config/mdr_configs.py index abfe2a4d..b99ec81e 100644 --- a/tpot2/config/mdr_configs.py +++ b/tpot2/config/mdr_configs.py @@ -11,15 +11,10 @@ } ) -MDR_configspace = ConfigurationSpace( - space = { - 'tie_break': Categorical('tie_break', [0,1]), - 'default_label': Categorical('default_label', [0,1]), - } -) -def get_skrebate_SURF_config_space(n_features=10): + +def get_skrebate_ReliefF_config_space(n_features=10): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), @@ -28,7 +23,7 @@ def get_skrebate_SURF_config_space(n_features=10): ) -def make_skrebate_SURF_config_space(n_features=10): +def get_skrebate_SURF_config_space(n_features=10): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), @@ -36,13 +31,13 @@ def make_skrebate_SURF_config_space(n_features=10): ) -def make_skrebate_SURFstar_config_space(n_features=10): +def get_skrebate_SURFstar_config_space(n_features=10): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), } ) -def make_skrebate_MultiSURF_config_space(n_features=10): +def get_skrebate_MultiSURF_config_space(n_features=10): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), diff --git a/tpot2/config/regressors_sklearnex.py b/tpot2/config/regressors_sklearnex.py index 298407cb..3473de56 100644 --- a/tpot2/config/regressors_sklearnex.py +++ b/tpot2/config/regressors_sklearnex.py @@ -1,18 +1,3 @@ -from sklearnex.linear_model import LinearRegression -from sklearnex.linear_model import Ridge -from sklearnex.linear_model import Lasso -from sklearnex.linear_model import ElasticNet - -from sklearnex.svm import SVR -from sklearnex.svm import NuSVR - -from sklearnex.ensemble import RandomForestRegressor -from sklearnex.neighbors import KNeighborsRegressor - -import numpy as np - -from functools import partial - from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal @@ -43,9 +28,6 @@ def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): } ) -LinearRegression_configspace = ConfigurationSpace() - - def get_Ridge_ConfigurationSpace(random_state=None): space = { @@ -108,16 +90,6 @@ def get_SVR_ConfigurationSpace(random_state=None): space = space ) -def params_NuSVR(trial, name=None): - return { - 'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0), - 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), - 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), - 'degree': trial.suggest_int(f'degree_{name}', 1, 4), - 'max_iter': 3000, - 'tol': 0.005, - } - def get_NuSVR_ConfigurationSpace(random_state=None): space = { 'nu': Float("nu", bounds=(0.05, 1.0)), diff --git a/tpot2/config/special_configs.py b/tpot2/config/special_configs.py index cdecfe7b..38545f6c 100644 --- a/tpot2/config/special_configs.py +++ b/tpot2/config/special_configs.py @@ -17,21 +17,19 @@ def get_ArithmeticTransformer_ConfigurationSpace(): -# def make_arithmetic_transformer_config_dictionary(): -# return { -# AddTransformer: {}, -# mul_neg_1_Transformer: {}, -# MulTransformer: {}, -# SafeReciprocalTransformer: {}, -# EQTransformer: {}, -# NETransformer: {}, -# GETransformer: {}, -# GTTransformer: {}, -# LETransformer: {}, -# LTTransformer: {}, -# MinTransformer: {}, -# MaxTransformer: {}, -# } +# AddTransformer: {} +# mul_neg_1_Transformer: {} +# MulTransformer: {} +# SafeReciprocalTransformer: {} +# EQTransformer: {} +# NETransformer: {} +# GETransformer: {} +# GTTransformer: {} +# LETransformer: {} +# LTTransformer: {} +# MinTransformer: {} +# MaxTransformer: {} + def get_FeatureSetSelector_ConfigurationSpace(names_list = None, subset_dict=None): diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 2c91beb6..88955ba7 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -23,7 +23,7 @@ def export_pipeline(self) -> BaseEstimator: return def unique_id(self): - return + return self class SklearnIndividualGenerator(): diff --git a/tpot2/search_spaces/nodes/__init__.py b/tpot2/search_spaces/nodes/__init__.py index 35cebf87..4026d02c 100644 --- a/tpot2/search_spaces/nodes/__init__.py +++ b/tpot2/search_spaces/nodes/__init__.py @@ -1,2 +1,3 @@ from .estimator_node import * -from .genetic_feature_selection import * \ No newline at end of file +from .genetic_feature_selection import * +from .fss_node import * \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index 6bea7615..6e084b59 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -18,7 +18,7 @@ def __init__(self, method: type, self.space = space if isinstance(space, dict): - self.space = space + self.hyperparameters = space else: rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) diff --git a/tpot2/search_spaces/nodes/fss_node.py b/tpot2/search_spaces/nodes/fss_node.py index e69de29b..fb039e12 100644 --- a/tpot2/search_spaces/nodes/fss_node.py +++ b/tpot2/search_spaces/nodes/fss_node.py @@ -0,0 +1,79 @@ +from numpy import iterable +import tpot2 +import numpy as np +import sklearn +import sklearn.datasets +import numpy as np + +import pandas as pd +import os, os.path +from sklearn.base import BaseEstimator +from sklearn.feature_selection._base import SelectorMixin + +from ..base import SklearnIndividual, SklearnIndividualGenerator + +from ...builtin_modules.feature_set_selector import FeatureSetSelector + +class FSSIndividual(SklearnIndividual): + def __init__( self, + subsets, + rng=None, + ): + + subsets = subsets + rng = np.random.default_rng(rng) + + if isinstance(subsets, str): + df = pd.read_csv(subsets,header=None,index_col=0) + df['features'] = df.apply(lambda x: list([x[c] for c in df.columns]),axis=1) + self.subset_dict = {} + for row in df.index: + self.subset_dict[row] = df.loc[row]['features'] + elif isinstance(subsets, dict): + self.subset_dict = subsets + elif isinstance(subsets, list) or isinstance(subsets, np.ndarray): + self.subset_dict = {str(i):subsets[i] for i in range(len(subsets))} + elif isinstance(subsets, int): + self.subset_dict = {"{0}".format(i):i for i in range(subsets)} + else: + raise ValueError("Subsets must be a string, dictionary, list, int, or numpy array") + + self.names_list = list(self.subset_dict.keys()) + + + self.selected_subset_name = rng.choice(self.names_list) + self.sel_subset = self.subset_dict[self.selected_subset_name] + + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + self.selected_subset_name = rng.choice(self.names_list) + self.sel_subset = self.subset_dict[self.selected_subset_name] + + + def crossover(self, other, rng=None): + self.selected_subset_name = other.selected_subset_name + self.sel_subset = other.sel_subset + + def export_pipeline(self): + return FeatureSetSelector(sel_subset=self.sel_subset, name=self.selected_subset_name) + + + def unique_id(self): + return self.selected_subset_name + + +class FSSNode(SklearnIndividualGenerator): + def __init__(self, + subsets, + rng=None, + ): + + self.subsets = subsets + self.rng = rng + + def generate(self, rng=None) -> SklearnIndividual: + return FSSIndividual( + subsets=self.subsets, + rng=rng, + ) \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index 54761123..e51ff8ba 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -151,7 +151,7 @@ def unique_id(self): class GeneticFeatureSelectorNode(SklearnIndividualGenerator): def __init__(self, - mask, + n_features, start_p=0.2, mutation_rate = 0.5, crossover_rate = 0.5, @@ -159,7 +159,7 @@ def __init__(self, crossover_rate_rate = 0, rng=None,): - self.mask = mask + self.n_features = n_features self.start_p = start_p self.mutation_rate = mutation_rate self.crossover_rate = crossover_rate @@ -168,7 +168,7 @@ def __init__(self, self.rng = rng def generate(self, rng=None) -> SklearnIndividual: - return GeneticFeatureSelectorIndividual( mask=self.mask, + return GeneticFeatureSelectorIndividual( mask=self.n_features, start_p=self.start_p, mutation_rate=self.mutation_rate, crossover_rate=self.crossover_rate, diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py index 9332a011..c8a5280f 100644 --- a/tpot2/search_spaces/pipelines/graph.py +++ b/tpot2/search_spaces/pipelines/graph.py @@ -48,7 +48,7 @@ def __init__(self, self.graph.add_edge(self.root, self.leaf) self.mutate_methods_list = [self._mutate_insert_leaf, self._mutate_insert_inner_node, self._mutate_remove_node, self._mutate_node] - self.crossover_methods_list = [self._crossover_swap_branch, self._crossover_swap_node, self._crossover_take_branch] #TODO self._crossover_nodes, + self.crossover_methods_list = [self._crossover_swap_branch,]#[self._crossover_swap_branch, self._crossover_swap_node, self._crossover_take_branch] #TODO self._crossover_nodes, self.merge_duplicated_nodes_toggle = True @@ -461,7 +461,7 @@ def _crossover_swap_leaf_at_node(self, G2, rng=None): - + #TODO edit so that G2 is not modified def _crossover_swap_node(self, G2, rng=None): ''' Swaps randomly chosen node from Parent1 with a randomly chosen node from Parent2. @@ -617,7 +617,7 @@ def plot(self): def unique_id(self): - return + return self class GraphPipeline(SklearnIndividualGenerator): diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py index 4459a284..f542c023 100644 --- a/tpot2/search_spaces/pipelines/sequential.py +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -47,7 +47,7 @@ def export_pipeline(self): return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline]) def unique_id(self): - return tuple([step.unique_id() for step in self.pipeline]) + return self class SequentialPipeline(SklearnIndividualGenerator): diff --git a/tpot2/tpot_estimator/estimator_utils.py b/tpot2/tpot_estimator/estimator_utils.py index 36e5c53c..c0b79739 100644 --- a/tpot2/tpot_estimator/estimator_utils.py +++ b/tpot2/tpot_estimator/estimator_utils.py @@ -102,7 +102,8 @@ def recursive_with_defaults(config_dict, n_samples, n_features, classification, def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_functions, step=None, budget=None, generation=1, is_classification=True, **pipeline_kwargs): - pipeline = pipeline.export_pipeline(**pipeline_kwargs) + #pipeline = pipeline.export_pipeline(**pipeline_kwargs) + pipeline = pipeline.export_pipeline() if budget is not None and budget < 1: if is_classification: x,y = sklearn.utils.resample(x,y, stratify=y, n_samples=int(budget*len(x)), replace=False, random_state=1) diff --git a/tpot2/tpot_estimator/templates/tpottemplates.py b/tpot2/tpot_estimator/templates/tpottemplates.py index 6da52dad..36b43c66 100644 --- a/tpot2/tpot_estimator/templates/tpottemplates.py +++ b/tpot2/tpot_estimator/templates/tpottemplates.py @@ -13,14 +13,7 @@ def __init__( self, other_objective_functions_weights = [], objective_function_names = None, bigger_is_better = True, - max_size = np.inf, - linear_pipeline = False, - root_config_dict= 'Auto', - inner_config_dict=["selectors", "transformers"], - leaf_config_dict= None, - cross_val_predict_cv = 0, categorical_features = None, - subsets = None, memory = None, preprocessing = False, max_time_seconds=3600, @@ -38,6 +31,15 @@ def __init__( self, """ See TPOTEstimator for documentation """ + + search_space = tpot2.search_spaces.pipelines.GraphPipeline( + root_search_space= tpot2.config.get_search_space("regressors"), + leaf_search_space = None, + inner_search_space = tpot2.config.get_search_space(["selectors","transformers","regressors"]), + max_size = 10, + ) + + super(TPOTRegressor,self).__init__( scorers=scorers, scorers_weights=scorers_weights, @@ -46,14 +48,10 @@ def __init__( self, other_objective_functions_weights = other_objective_functions_weights, objective_function_names = objective_function_names, bigger_is_better = bigger_is_better, - max_size = max_size, - linear_pipeline = linear_pipeline, - root_config_dict = root_config_dict, - inner_config_dict=inner_config_dict, - leaf_config_dict= leaf_config_dict, - cross_val_predict_cv = cross_val_predict_cv, + + search_space=search_space, + categorical_features = categorical_features, - subsets = subsets, memory = memory, preprocessing = preprocessing, max_time_seconds=max_time_seconds, @@ -79,14 +77,7 @@ def __init__( self, other_objective_functions_weights = [], objective_function_names = None, bigger_is_better = True, - max_size = np.inf, - linear_pipeline = False, - root_config_dict= 'Auto', - inner_config_dict=["selectors", "transformers"], - leaf_config_dict= None, - cross_val_predict_cv = 0, categorical_features = None, - subsets = None, memory = None, preprocessing = False, max_time_seconds=3600, @@ -105,6 +96,15 @@ def __init__( self, """ See TPOTEstimator for documentation """ + + search_space = tpot2.search_spaces.pipelines.GraphPipeline( + root_search_space= tpot2.config.get_search_space("classifiers"), + leaf_search_space = None, + inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers"]), + max_size = 10, + ) + + super(TPOTClassifier,self).__init__( scorers=scorers, scorers_weights=scorers_weights, @@ -113,14 +113,8 @@ def __init__( self, other_objective_functions_weights = other_objective_functions_weights, objective_function_names = objective_function_names, bigger_is_better = bigger_is_better, - max_size = max_size, - linear_pipeline = linear_pipeline, - root_config_dict = root_config_dict, - inner_config_dict=inner_config_dict, - leaf_config_dict= leaf_config_dict, - cross_val_predict_cv = cross_val_predict_cv, + search_space=search_space, categorical_features = categorical_features, - subsets = subsets, memory = memory, preprocessing = preprocessing, max_time_seconds=max_time_seconds, From 974582c064b7cd8542f7808424049f49edbc957e Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 26 Mar 2024 19:15:47 -0700 Subject: [PATCH 05/75] edits to tutorials --- Tutorial/1_Estimators_Overview.ipynb | 129 +++++++++--------- Tutorial/2_Search_Spaces.ipynb | 192 ++++++++++++--------------- 2 files changed, 148 insertions(+), 173 deletions(-) diff --git a/Tutorial/1_Estimators_Overview.ipynb b/Tutorial/1_Estimators_Overview.ipynb index 33a71097..40651551 100644 --- a/Tutorial/1_Estimators_Overview.ipynb +++ b/Tutorial/1_Estimators_Overview.ipynb @@ -20,27 +20,60 @@ "2. `tpot2.TPOTRegressor` for regression tasks" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scorers, Objective Functions, and multi objective optimization.\n", + "\n", + "There are two ways of passing objectives into TPOT2. \n", + "\n", + "1. `scorers`: Scorers are functions that have the signature (estimator, X, y). These can be produced with the [sklearn.metrics.make_scorer](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html) function. This function is used to evaluate the test folds during cross validation. These are passed into TPOT2 via the scorers parameter. This can take in the scorer itself or the string corresponding to a scoring function ([as listed here](https://scikit-learn.org/stable/modules/model_evaluation.html)). TPOT2 also supports passing in a list of several scorers for multiobjective optimization. \n", + "\n", + "2. `other_objective_functions` : Other objective functions in TPOT2 have the signature (estimator) and returns a float or list of floats. These get passed an unfitted estimator (in the case of TPOT2, a `tpot2.GraphPipeline`). \n", + "\n", + "\n", + "Each scorer and objective function must be accompanied by a list of weights corresponding to the list of objectives. By default, TPOT2 maximizes objective functions (this can be changed by `bigger_is_better=False`). Positive weights means that TPOT2 will seek to maximize that objective, and negative weights correspond to minimization.\n", + "\n", + "Here is an example of using two scorers\n", + "\n", + " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", + " scorers_weights=[1,-1],\n", + "\n", + "\n", + "Here is an example with a scorer and a secondary objective function\n", + "\n", + " scorers=['roc_auc_ovr'],\n", + " scorers_weights=[1],\n", + " other_objective_functions=[tpot2.objectives.number_of_leaves_objective],\n", + " other_objective_functions_weights=[-1]," + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: : 0it [00:00, ?it/s]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.9910779766159422, 0.9164180506462885, 0.9842489682733572, 0.99664936783213, 0.9913591306204854, 0.9785097508524515, 0.9843199854934415, 0.9981583597446381, 0.99559929270021, 0.9511441780591989, 0.9984744292898663, 0.9974402833866118, 0.9914842901220224, 0.9969100719668479, 0.9909145271063142, 0.9910709279190263, 0.9411694123791475, 0.9910354966095938, 0.9776626614599555, 0.9911887873368403, 0.9966903342486351, 0.9988343538601064, 0.9735967719140286, 0.9968575356141441, 0.9958485748358322, 0.9992471065344972, 0.9605917171252578, 0.9904942837739565, 0.9974574181131549, 0.9996403182930008, 0.9694102480973864, 0.9984821310846055, 0.9940551825220357, 0.9837735643634151, 0.9671044961833003, 0.9913835311537978, 0.9989793765342894, 0.9997847101769164, 0.991564988067797, 0.9988538844163573, 0.9895795999679059, 0.9750578580595717, 0.9971245111678281, 0.997177499370075, 0.9988702870584362, 'INVALID', 0.9131272065575761, 'INVALID', 0.9969386481385651, 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + "Generation: : 0it [00:00, ?it/s]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.9987673084964015, 0.9847336570645737, 0.9905041856662168, 0.9993763441765191, 0.9907964721472181, 0.9972029119373254, 0.998357673885215, 0.9990613894532178, 0.9694329382784964, 0.997532770251522, 0.9975246891516141, 0.9904661635101079, 0.5, 'INVALID', 0.9948797140575193, 0.5, 0.9849666992163886, 0.9824262822238007, 0.9985479308254266, 0.9972915114620106, 0.9694329382784964, 'INVALID', 0.9324417946451, 0.9901685392720255, 0.9978183481741485, 0.9973644125394717, 0.9905750509316356, 0.9819493383116706, 0.9699621501061083, 0.6072655018077077, 0.9694329382784964, 0.9838996235635504, 0.982122385127114, 0.9901266523287818, 0.9301526525124777, 0.9720743554304064, 0.994576960473181, 0.5, 0.5, 0.9948330115435265, 0.9990358447113457, 0.9945434259359371, 0.9375978779782033, 0.9993887714241577, 0.997164111114518, 'INVALID', 'INVALID', 0.9493406027781374, 0.9767172486252121, 0.9974530907820837]' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " self.evaluated_individuals.loc[key,column_names] = data\n", - "Generation: : 1it [00:15, 15.14s/it]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_crossover' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", + "Generation: : 1it [00:14, 14.92s/it]/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_mutate' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " self.evaluated_individuals.at[new_child.unique_id(),\"Variation_Function\"] = var_op\n", - "Generation: : 3it [01:30, 30.07s/it]\n" + "Generation: : 4it [01:19, 19.94s/it]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/feature_selection/_univariate_selection.py:112: UserWarning: Features [ 0 32 39] are constant.\n", + " warnings.warn(\"Features %s are constant.\" % constant_features_idx, UserWarning)\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/feature_selection/_univariate_selection.py:113: RuntimeWarning: invalid value encountered in divide\n", + " f = msb / msw\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9998423966736188\n" + "0.9999588595957566\n" ] } ], @@ -125,19 +158,15 @@ "import numpy as np\n", "\n", "if __name__==\"__main__\":\n", - " scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n", + " scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n", " X, y = sklearn.datasets.load_digits(return_X_y=True)\n", " X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", - " est = tpot2.TPOTEstimatorSteadyState( \n", - " scorers=['roc_auc_ovr'], #scorers can be a list of strings or a list of scorers. These get evaluated during cross validation. \n", - " scorers_weights=[1],\n", "\n", - " classification=True,\n", "\n", - " max_eval_time_seconds=15,\n", - " max_time_seconds=30,\n", - " verbose=2)\n", + " est = tpot2.TPOTClassifier(n_jobs=4, max_time_seconds=60, verbose=2)\n", " est.fit(X_train, y_train)\n", + "\n", + "\n", " print(scorer(est, X_test, y_test))" ] }, @@ -174,13 +203,6 @@ " \n", " bigger_is_better : bool, default=True\n", " If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction.\n", - "\n", - " \n", - " max_size : int, default=np.inf\n", - " The maximum number of nodes of the pipelines to be generated.\n", - " \n", - " linear_pipeline : bool, default=False\n", - " If True, the pipelines generated will be linear. If False, the pipelines generated will be directed acyclic graphs.\n", " \n", " generations : int, default=50\n", " Number of generations to run\n", @@ -207,13 +229,19 @@ " 4. warnings\n", " >=5. full warnings trace\n", " 6. evaluations progress bar. (Temporary: This used to be 2. Currently, using evaluation progress bar may prevent some instances were we terminate a generation early due to it reaching max_time_seconds in the middle of a generation OR a pipeline failed to be terminated normally and we need to manually terminate it.)\n", - " \n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TPOTEstimator and TPOTEstimatorSteadyState\n", "\n", - "The following configuration dictionaries are covered in the next tutorial:\n", + "TPOTEstimator and TPOTEstimatorSteadyState expose more parameters for customizing search spaces and evolutionary algorithms. The next tutorial will cover customizing search spaces in more detail.\n", "\n", - " root_config_dict\n", - " inner_config_dict\n", - " leaf_config_dict" + "The TPOTClassifier and TPOTRegressor set default parameters for the TPOTEstimator for Classification and Regression.\n", + "In the future, a metalearner will be used to predict the best values for a given dataset." ] }, { @@ -233,7 +261,16 @@ "import sklearn\n", "import sklearn.datasets\n", "\n", + "\n", + "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", + " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", + " leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n", + " inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n", + " max_size = 10,\n", + ")\n", + "\n", "est = tpot2.TPOTEstimatorSteadyState( \n", + " search_space = graph_search_space,\n", " scorers=['roc_auc_ovr'], #scorers can be a list of strings or a list of scorers. These get evaluated during cross validation. \n", " scorers_weights=[1],\n", "\n", @@ -271,35 +308,6 @@ "est.evaluated_individuals" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Scorers, Objective Functions, and multi objective optimization.\n", - "\n", - "There are two ways of passing objectives into TPOT2. \n", - "\n", - "1. `scorers`: Scorers are functions that have the signature (estimator, X, y). These can be produced with the [sklearn.metrics.make_scorer](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html) function. This function is used to evaluate the test folds during cross validation. These are passed into TPOT2 via the scorers parameter. This can take in the scorer itself or the string corresponding to a scoring function ([as listed here](https://scikit-learn.org/stable/modules/model_evaluation.html)). TPOT2 also supports passing in a list of several scorers for multiobjective optimization. \n", - "\n", - "2. `other_objective_functions` : Other objective functions in TPOT2 have the signature (estimator) and returns a float or list of floats. These get passed an unfitted estimator (in the case of TPOT2, a `tpot2.GraphPipeline`). \n", - "\n", - "\n", - "Each scorer and objective function must be accompanied by a list of weights corresponding to the list of objectives. By default, TPOT2 maximizes objective functions (this can be changed by `bigger_is_better=False`). Positive weights means that TPOT2 will seek to maximize that objective, and negative weights correspond to minimization.\n", - "\n", - "Here is an example of using two scorers\n", - "\n", - " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", - " scorers_weights=[1,-1],\n", - "\n", - "\n", - "Here is an example with a scorer and a secondary objective function\n", - "\n", - " scorers=['roc_auc_ovr'],\n", - " scorers_weights=[1],\n", - " other_objective_functions=[tpot2.objectives.number_of_leaves_objective],\n", - " other_objective_functions_weights=[-1]," - ] - }, { "cell_type": "code", "execution_count": null, @@ -311,6 +319,7 @@ "import sklearn.datasets\n", "\n", "est = tpot2.TPOTEstimatorSteadyState( \n", + " search_space = graph_search_space,\n", " scorers=['roc_auc_ovr',tpot2.objectives.complexity_scorer],\n", " scorers_weights=[1,-1],\n", "\n", @@ -404,7 +413,9 @@ "import sklearn\n", "import sklearn.datasets\n", "\n", - "est = tpot2.TPOTEstimator( population_size=30,\n", + "est = tpot2.TPOTEstimator( \n", + " search_space = graph_search_space,\n", + " population_size=30,\n", " generations=5,\n", " scorers=['roc_auc_ovr'], #scorers can be a list of strings or a list of scorers. These get evaluated during cross validation. \n", " scorers_weights=[1],\n", @@ -425,14 +436,6 @@ "est.fit(X_train, y_train)\n", "print(scorer(est, X_test, y_test))" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The TPOTClassifier and TPOTRegressor are set default parameters for the TPOTEstimator for Classification and Regression.\n", - "In the future, a metalearner will be used to predict the best values for a given dataset." - ] } ], "metadata": { diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index 940509de..c4aa8ab2 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -31,7 +31,7 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 1, 'p': 3, 'weights': 'uniform'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n" ] } ], @@ -154,9 +154,9 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 6, 'p': 2, 'weights': 'uniform'}\n", "mutated hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'distance'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 4, 'p': 3, 'weights': 'distance'}\n" ] } ], @@ -187,14 +187,14 @@ "output_type": "stream", "text": [ "original hyperparameters for individual 1\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'distance'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'uniform'}\n", "original hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n", "\n", "post crossover hyperparameters for individual 1\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 7, 'p': 2, 'weights': 'uniform'}\n", "post crossover hyperparameters for individual 2\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n" ] } ], @@ -637,10 +637,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(n_jobs=1, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, n_neighbors=7)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(n_jobs=1, p=1)" + "KNeighborsClassifier(n_jobs=1, n_neighbors=7)" ] }, "execution_count": 5, @@ -676,7 +676,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -1194,13 +1194,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
LogisticRegression(C=0.5857355940220703, class_weight='balanced', dual=True,\n",
-       "                   max_iter=1000, n_jobs=1, penalty='l1', solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "LogisticRegression(C=0.5857355940220703, class_weight='balanced', dual=True,\n", - " max_iter=1000, n_jobs=1, penalty='l1', solver='saga')" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=3)" ] }, "execution_count": 7, @@ -1634,13 +1631,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
LogisticRegression(C=2.032340407557342, class_weight='balanced', max_iter=1000,\n",
-       "                   n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "LogisticRegression(C=2.032340407557342, class_weight='balanced', max_iter=1000,\n", - " n_jobs=1)" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1)" ] }, "execution_count": 8, @@ -2091,13 +2085,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
DecisionTreeClassifier(max_depth=30, max_features='sqrt', min_samples_leaf=3,\n",
-       "                       min_samples_split=18)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(criterion='entropy', max_depth=22, max_features=1.0,\n",
+       "                       min_samples_leaf=16, min_samples_split=20)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(max_depth=30, max_features='sqrt', min_samples_leaf=3,\n", - " min_samples_split=18)" + "DecisionTreeClassifier(criterion='entropy', max_depth=22, max_features=1.0,\n", + " min_samples_leaf=16, min_samples_split=20)" ] }, "execution_count": 9, @@ -2532,13 +2526,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
DecisionTreeClassifier(max_depth=19, max_features='sqrt', min_samples_leaf=8,\n",
-       "                       min_samples_split=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, n_neighbors=4)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(max_depth=19, max_features='sqrt', min_samples_leaf=8,\n", - " min_samples_split=5)" + "KNeighborsClassifier(n_jobs=1, n_neighbors=4)" ] }, "execution_count": 10, @@ -2970,13 +2961,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
ExtraTreesClassifier(max_features=0.40389574491352287, min_samples_leaf=15,\n",
-       "                     min_samples_split=13, n_jobs=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
MLPClassifier(alpha=0.09935758704160183,\n",
+       "              learning_rate_init=0.004466259151092733)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "ExtraTreesClassifier(max_features=0.40389574491352287, min_samples_leaf=15,\n", - " min_samples_split=13, n_jobs=1)" + "MLPClassifier(alpha=0.09935758704160183,\n", + " learning_rate_init=0.004466259151092733)" ] }, "execution_count": 11, @@ -3411,13 +3402,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
SVC(C=7.943520510912431, degree=1, kernel='linear', max_iter=3000,\n",
-       "    probability=True)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(criterion='entropy', max_depth=11, max_features=1.0,\n",
+       "                       min_samples_leaf=12, min_samples_split=8)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "SVC(C=7.943520510912431, degree=1, kernel='linear', max_iter=3000,\n", - " probability=True)" + "DecisionTreeClassifier(criterion='entropy', max_depth=11, max_features=1.0,\n", + " min_samples_leaf=12, min_samples_split=8)" ] }, "execution_count": 12, @@ -3858,35 +3849,26 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
Pipeline(steps=[('variancethreshold',\n",
-       "                 VarianceThreshold(threshold=0.16682490562982172)),\n",
-       "                ('nystroem',\n",
-       "                 Nystroem(gamma=0.7638884024411401, kernel='linear',\n",
-       "                          n_components=98)),\n",
-       "                ('extratreesclassifier',\n",
-       "                 ExtraTreesClassifier(max_features=0.41763504253232936,\n",
-       "                                      min_samples_leaf=8, min_samples_split=17,\n",
-       "                                      n_jobs=1))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.007682074361801758)),\n",
+       "                ('fastica', FastICA(n_components=64)),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(bootstrap=False, criterion='entropy',\n",
+       "                                        min_samples_leaf=10,\n",
+       "                                        min_samples_split=6))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('variancethreshold',\n", - " VarianceThreshold(threshold=0.16682490562982172)),\n", - " ('nystroem',\n", - " Nystroem(gamma=0.7638884024411401, kernel='linear',\n", - " n_components=98)),\n", - " ('extratreesclassifier',\n", - " ExtraTreesClassifier(max_features=0.41763504253232936,\n", - " min_samples_leaf=8, min_samples_split=17,\n", - " n_jobs=1))])" + "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.007682074361801758)),\n", + " ('fastica', FastICA(n_components=64)),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(bootstrap=False, criterion='entropy',\n", + " min_samples_leaf=10,\n", + " min_samples_split=6))])" ] }, "execution_count": 13, @@ -4325,28 +4307,28 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
Pipeline(steps=[('variancethreshold',\n",
-       "                 VarianceThreshold(threshold=0.029163176782587025)),\n",
-       "                ('rbfsampler',\n",
-       "                 RBFSampler(gamma=0.3360335889875927, n_components=61)),\n",
-       "                ('randomforestclassifier',\n",
-       "                 RandomForestClassifier(min_samples_leaf=2,\n",
-       "                                        min_samples_split=5))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('selectpercentile',\n",
+       "                 SelectPercentile(percentile=75.04535288452273)),\n",
+       "                ('nystroem',\n",
+       "                 Nystroem(gamma=0.4607961332716787, kernel='laplacian',\n",
+       "                          n_components=90)),\n",
+       "                ('bernoullinb',\n",
+       "                 BernoulliNB(alpha=2.4816194955956314, fit_prior=False))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('variancethreshold',\n", - " VarianceThreshold(threshold=0.029163176782587025)),\n", - " ('rbfsampler',\n", - " RBFSampler(gamma=0.3360335889875927, n_components=61)),\n", - " ('randomforestclassifier',\n", - " RandomForestClassifier(min_samples_leaf=2,\n", - " min_samples_split=5))])" + "Pipeline(steps=[('selectpercentile',\n", + " SelectPercentile(percentile=75.04535288452273)),\n", + " ('nystroem',\n", + " Nystroem(gamma=0.4607961332716787, kernel='laplacian',\n", + " n_components=90)),\n", + " ('bernoullinb',\n", + " BernoulliNB(alpha=2.4816194955956314, fit_prior=False))])" ] }, "execution_count": 14, @@ -4377,18 +4359,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 0%| | 0/5 [00:00
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
-       "              population_size=10, processes=False, scorers=['roc_auc'],\n",
-       "              scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x7ebd8bf94bb0>,\n",
+       "              population_size=10, scorers=['roc_auc'], scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x77c026a110c0>,\n",
        "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n", - " population_size=10, processes=False, scorers=['roc_auc'],\n", - " scorers_weights=[1],\n", - " search_space=,\n", + " population_size=10, scorers=['roc_auc'], scorers_weights=[1],\n", + " search_space=,\n", " verbose=2)" ] }, @@ -4866,14 +4838,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "auroc score 0.9890552995391705\n" + "auroc score 0.9876518024288388\n" ] } ], @@ -4888,12 +4860,12 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] From 22fd8091d95efd3a7e46ec7fbab3725164002416 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 26 Mar 2024 19:33:27 -0700 Subject: [PATCH 06/75] edit --- Tutorial/5_Genetic_Feature_Selection.ipynb | 108 ++++++++++----------- 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/Tutorial/5_Genetic_Feature_Selection.ipynb b/Tutorial/5_Genetic_Feature_Selection.ipynb index 96bf78b1..a9afcf4b 100644 --- a/Tutorial/5_Genetic_Feature_Selection.ipynb +++ b/Tutorial/5_Genetic_Feature_Selection.ipynb @@ -18,20 +18,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 0%| | 0/5 [00:00
Pipeline(steps=[('maskselector',\n",
-       "                 MaskSelector(mask=array([ True,  True, False, False, False, False, False, False, False,\n",
-       "        True,  True, False,  True, False,  True,  True,  True, False,\n",
-       "       False, False,  True, False, False, False, False,  True, False,\n",
-       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
-       "        True,  True, False, False,  True, False,  True,  True, False,\n",
-       "       False,  True, False,  True, False,  True, False,  True, Fa...\n",
-       "        True, False,  True, False,  True,  True,  True, False,  True,\n",
-       "        True,  True,  True,  True,  True,  True, False,  True,  True,\n",
-       "        True, False, False, False,  True,  True, False,  True,  True,\n",
-       "        True,  True, False, False, False,  True, False,  True, False,\n",
-       "        True, False, False,  True, False,  True, False, False, False,\n",
+       "                 MaskSelector(mask=array([False,  True, False, False, False,  True,  True, False, False,\n",
+       "        True, False,  True,  True, False, False, False,  True, False,\n",
+       "       False, False, False,  True, False, False, False, False, False,\n",
+       "        True, False, False, False,  True,  True,  True, False,  True,\n",
+       "        True,  True,  True,  True, False,  True,  True, False, False,\n",
+       "       False, False,  True, False,  True, False, False, Fa...\n",
+       "        True,  True,  True, False, False,  True,  True, False, False,\n",
+       "        True, False, False, False, False, False,  True, False,  True,\n",
+       "       False, False,  True, False, False, False,  True,  True,  True,\n",
+       "       False, False, False,  True, False, False,  True, False, False,\n",
+       "       False,  True, False, False, False,  True,  True, False, False,\n",
        "        True]))),\n",
        "                ('graphpipeline',\n",
-       "                 GraphPipeline(graph=<networkx.classes.digraph.DiGraph object at 0x76ebe05ee590>))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MaskSelector(mask=array([False,  True, False, False, False,  True,  True, False, False,\n",
+       "        True, False,  True,  True, False, False, False,  True, False,\n",
+       "       False, False, False,  True, False, False, False, False, False,\n",
+       "        True, False, False, False,  True,  True,  True, False,  True,\n",
+       "        True,  True,  True,  True, False,  True,  True, False, False,\n",
+       "       False, False,  True, False,  True, False, False, False,  True,\n",
+       "        True,  True,  True, False, False,  True,  True, False, False,\n",
+       "        True, False, False, False, False, False,  True, False,  True,\n",
+       "       False, False,  True, False, False, False,  True,  True,  True,\n",
+       "       False, False, False,  True, False, False,  True, False, False,\n",
+       "       False,  True, False, False, False,  True,  True, False, False,\n",
+       "        True]))
[('KNeighborsClassifier_1', 'Normalizer_1')]
" ], "text/plain": [ "Pipeline(steps=[('maskselector',\n", - " MaskSelector(mask=array([ True, True, False, False, False, False, False, False, False,\n", - " True, True, False, True, False, True, True, True, False,\n", - " False, False, True, False, False, False, False, True, False,\n", - " True, True, True, True, True, True, True, True, False,\n", - " True, True, False, False, True, False, True, True, False,\n", - " False, True, False, True, False, True, False, True, Fa...\n", - " True, False, True, False, True, True, True, False, True,\n", - " True, True, True, True, True, True, False, True, True,\n", - " True, False, False, False, True, True, False, True, True,\n", - " True, True, False, False, False, True, False, True, False,\n", - " True, False, False, True, False, True, False, False, False,\n", + " MaskSelector(mask=array([False, True, False, False, False, True, True, False, False,\n", + " True, False, True, True, False, False, False, True, False,\n", + " False, False, False, True, False, False, False, False, False,\n", + " True, False, False, False, True, True, True, False, True,\n", + " True, True, True, True, False, True, True, False, False,\n", + " False, False, True, False, True, False, False, Fa...\n", + " True, True, True, False, False, True, True, False, False,\n", + " True, False, False, False, False, False, True, False, True,\n", + " False, False, True, False, False, False, True, True, True,\n", + " False, False, False, True, False, False, True, False, False,\n", + " False, True, False, False, False, True, True, False, False,\n", " True]))),\n", " ('graphpipeline',\n", - " GraphPipeline(graph=))])" + " GraphPipeline(graph=))])" ] }, "execution_count": 2, @@ -553,12 +547,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] From 68378bcc758a04ae97c5d989f82bb4ed4497c6e2 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 26 Mar 2024 19:48:44 -0700 Subject: [PATCH 07/75] rng_ to rng --- tpot2/evolvers/steady_state_evolver.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index 7f03b5a3..5db3e502 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -23,13 +23,13 @@ import warnings -def ind_mutate(ind, rng_): - rng = np.random.default_rng(rng_) - return ind.mutate(rng_=rng) +def ind_mutate(ind, rng): + rng = np.random.default_rng(rng) + return ind.mutate(rng=rng) -def ind_crossover(ind1, ind2, rng_): - rng = np.random.default_rng(rng_) - return ind1.crossover(ind2, rng_=rng) +def ind_crossover(ind1, ind2, rng): + rng = np.random.default_rng(rng) + return ind1.crossover(ind2, rng=rng) class SteadyStateEvolver(): def __init__( self, @@ -480,16 +480,16 @@ def optimize(self): # parents = [] # for op in var_ops: # if op == "mutate": - # parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=1, rng_=self.rng)]) + # parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=1, rng=self.rng)]) # else: - # parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=2, rng_=self.rng)]) + # parents.extend(np.array(cur_evaluated_population)[self.parent_selector(weighted_scores, k=1, n_parents=2, rng=self.rng)]) - # #_offspring = self.population.create_offspring2(parents, var_ops, rng_=self.rng, add_to_population=True) - # offspring = self.population.create_offspring2(parents, var_ops, [ind_mutate], None, [ind_crossover], None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng_=self.rng) + # #_offspring = self.population.create_offspring2(parents, var_ops, rng=self.rng, add_to_population=True) + # offspring = self.population.create_offspring2(parents, var_ops, [ind_mutate], None, [ind_crossover], None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng=self.rng) if enough_parents_evaluated: - parents = self.population.parent_select(selector=self.parent_selector, weights=self.objective_function_weights, columns_names=self.objective_names, k=n_individuals_to_submit, n_parents=2, rng_=self.rng) + parents = self.population.parent_select(selector=self.parent_selector, weights=self.objective_function_weights, columns_names=self.objective_names, k=n_individuals_to_submit, n_parents=2, rng=self.rng) p = np.array([self.crossover_probability, self.mutate_then_crossover_probability, self.crossover_then_mutate_probability, self.mutate_probability]) p = p / p.sum() var_op_list = self.rng.choice(["crossover", "mutate_then_crossover", "crossover_then_mutate", "mutate"], size=n_individuals_to_submit, p=p) @@ -498,7 +498,7 @@ def optimize(self): if op == "mutate": parents[i] = parents[i][0] #mutations take a single individual - offspring = self.population.create_offspring2(parents, var_op_list, [ind_mutate], None, [ind_crossover], None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng_=self.rng) + offspring = self.population.create_offspring2(parents, var_op_list, [ind_mutate], None, [ind_crossover], None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, rng=self.rng) # If we don't have enough evaluated individuals to use as parents for variation, we create new individuals randomly # This can happen if the individuals in the initial population are invalid From d806571066cf4a6b61008ae02ba2fa7ce35ae0ab Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 27 Mar 2024 12:19:06 -0700 Subject: [PATCH 08/75] graph and tree random length initial pipeline --- tpot2/search_spaces/pipelines/graph.py | 30 ++++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py index c8a5280f..0ebe7092 100644 --- a/tpot2/search_spaces/pipelines/graph.py +++ b/tpot2/search_spaces/pipelines/graph.py @@ -303,7 +303,7 @@ def _crossover_swap_branch(self, G2, rng=None): pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) for node1, node2 in pair_gen: - #TODO: if root is in inner_config_dict, then do use it? + #TODO: if root is in inner_search_space, then do use it? if node1 is self.root or node2 is G2.root: #dont want to add root as inner node continue @@ -354,7 +354,7 @@ def _crossover_take_branch(self, G2, rng=None): pair_gen = select_nodes_randomly(self.graph, G2.graph, rng=rng) for node1, node2 in pair_gen: - #TODO: if root is in inner_config_dict, then do use it? + #TODO: if root is in inner_search_space, then do use it? if node2 is G2.root: #dont want to add root as inner node continue @@ -365,7 +365,7 @@ def _crossover_take_branch(self, G2, rng=None): #icheck if node2 is graph individual # if isinstance(node2,GraphIndividual): - # if not ((isinstance(node2,GraphIndividual) and ("Recursive" in self.inner_config_dict or "Recursive" in self.leaf_search_space))): + # if not ((isinstance(node2,GraphIndividual) and ("Recursive" in self.inner_search_space or "Recursive" in self.leaf_search_space))): # continue #isolating the branch @@ -624,9 +624,8 @@ class GraphPipeline(SklearnIndividualGenerator): def __init__(self, root_search_space : SklearnIndividualGenerator, leaf_search_space : SklearnIndividualGenerator = None, inner_search_space : SklearnIndividualGenerator =None, - max_size: int = 10, - crossover_same_depth=False, - rng=None) -> None: + max_size: int = np.inf, + crossover_same_depth=False) -> None: """ Generates a directed acyclic graph of variable size. Search spaces for root, leaf, and inner nodes can be defined separately if desired. @@ -642,4 +641,21 @@ def __init__(self, root_search_space : SklearnIndividualGenerator, self.crossover_same_depth = crossover_same_depth def generate(self, rng=None): - return GraphPipelineIndividual(self.search_space, self.leaf_search_space, self.inner_search_space, self.max_size, self.crossover_same_depth, rng=rng) \ No newline at end of file + rng = np.random.default_rng(rng) + ind = GraphPipelineIndividual(self.search_space, self.leaf_search_space, self.inner_search_space, self.max_size, self.crossover_same_depth, rng=rng) + # if user specified limit, grab a random number between that limit + + n_nodes = min(rng.integers(1, self.max_size), 5) + starting_ops = [] + if self.inner_search_space is not None: + starting_ops.append(ind._mutate_insert_inner_node) + if self.leaf_search_space is not None or self.inner_search_space is not None: + starting_ops.append(ind._mutate_insert_leaf) + n_nodes -= 1 + + if len(starting_ops) > 0: + for _ in range(n_nodes-1): + func = rng.choice(starting_ops) + func(rng=rng) + + return ind \ No newline at end of file From 3e2a3c4d6007da7decd78774a8cd21a97b23f13f Mon Sep 17 00:00:00 2001 From: perib Date: Fri, 29 Mar 2024 15:57:44 -0700 Subject: [PATCH 09/75] flatten pipelines to graph, graphpipe params --- Tutorial/2_Search_Spaces.ipynb | 192 +++++++++++++--------- tpot2/graphsklearn.py | 4 +- tpot2/search_spaces/base.py | 104 +++++++++++- tpot2/search_spaces/pipelines/__init__.py | 4 +- tpot2/search_spaces/pipelines/graph.py | 142 +++++++++++++--- 5 files changed, 343 insertions(+), 103 deletions(-) diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index c4aa8ab2..8e0af2b9 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -31,7 +31,7 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 3, 'weights': 'uniform'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 6, 'p': 3, 'weights': 'distance'}\n" ] } ], @@ -154,9 +154,9 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 6, 'p': 2, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'distance'}\n", "mutated hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 4, 'p': 3, 'weights': 'distance'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}\n" ] } ], @@ -187,14 +187,14 @@ "output_type": "stream", "text": [ "original hyperparameters for individual 1\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 7, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}\n", "original hyperparameters for individual 2\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 4, 'p': 1, 'weights': 'uniform'}\n", "\n", "post crossover hyperparameters for individual 1\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 7, 'p': 2, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}\n", "post crossover hyperparameters for individual 2\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 2, 'weights': 'distance'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 4, 'p': 1, 'weights': 'uniform'}\n" ] } ], @@ -637,10 +637,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(n_jobs=1, n_neighbors=7)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(n_jobs=1, n_neighbors=7)" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9)" ] }, "execution_count": 5, @@ -676,7 +676,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -1194,10 +1194,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression(C=99.0450142669678, class_weight='balanced', dual=True,\n",
+       "                   max_iter=1000, n_jobs=1, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=3)" + "LogisticRegression(C=99.0450142669678, class_weight='balanced', dual=True,\n", + " max_iter=1000, n_jobs=1, solver='liblinear')" ] }, "execution_count": 7, @@ -1631,10 +1634,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=4, p=3,\n",
+       "                     weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1)" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=4, p=3,\n", + " weights='distance')" ] }, "execution_count": 8, @@ -2085,13 +2091,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
DecisionTreeClassifier(criterion='entropy', max_depth=22, max_features=1.0,\n",
-       "                       min_samples_leaf=16, min_samples_split=20)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features='log2',\n",
+       "                       min_samples_leaf=4, min_samples_split=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(criterion='entropy', max_depth=22, max_features=1.0,\n", - " min_samples_leaf=16, min_samples_split=20)" + "DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features='log2',\n", + " min_samples_leaf=4, min_samples_split=10)" ] }, "execution_count": 9, @@ -2526,10 +2532,13 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(n_jobs=1, n_neighbors=4)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
DecisionTreeClassifier(criterion='entropy', max_depth=25, max_features='log2',\n",
+       "                       min_samples_leaf=6, min_samples_split=13)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(n_jobs=1, n_neighbors=4)" + "DecisionTreeClassifier(criterion='entropy', max_depth=25, max_features='log2',\n", + " min_samples_leaf=6, min_samples_split=13)" ] }, "execution_count": 10, @@ -2961,13 +2970,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
MLPClassifier(alpha=0.09935758704160183,\n",
-       "              learning_rate_init=0.004466259151092733)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
BernoulliNB(alpha=1.1043626639293316)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "MLPClassifier(alpha=0.09935758704160183,\n", - " learning_rate_init=0.004466259151092733)" + "BernoulliNB(alpha=1.1043626639293316)" ] }, "execution_count": 11, @@ -3402,13 +3408,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
DecisionTreeClassifier(criterion='entropy', max_depth=11, max_features=1.0,\n",
-       "                       min_samples_leaf=12, min_samples_split=8)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
SVC(C=0.007250294080496579, degree=2, max_iter=3000, probability=True)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(criterion='entropy', max_depth=11, max_features=1.0,\n", - " min_samples_leaf=12, min_samples_split=8)" + "SVC(C=0.007250294080496579, degree=2, max_iter=3000, probability=True)" ] }, "execution_count": 12, @@ -3849,26 +3852,19 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.007682074361801758)),\n",
-       "                ('fastica', FastICA(n_components=64)),\n",
-       "                ('randomforestclassifier',\n",
-       "                 RandomForestClassifier(bootstrap=False, criterion='entropy',\n",
-       "                                        min_samples_leaf=10,\n",
-       "                                        min_samples_split=6))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0004402567631974485)),\n",
+       "                ('rbfsampler',\n",
+       "                 RBFSampler(gamma=0.5507862784926447, n_components=4)),\n",
+       "                ('multinomialnb', MultinomialNB(alpha=0.019703201853925403))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.007682074361801758)),\n", - " ('fastica', FastICA(n_components=64)),\n", - " ('randomforestclassifier',\n", - " RandomForestClassifier(bootstrap=False, criterion='entropy',\n", - " min_samples_leaf=10,\n", - " min_samples_split=6))])" + "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0004402567631974485)),\n", + " ('rbfsampler',\n", + " RBFSampler(gamma=0.5507862784926447, n_components=4)),\n", + " ('multinomialnb', MultinomialNB(alpha=0.019703201853925403))])" ] }, "execution_count": 13, @@ -4308,27 +4304,79 @@ " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "
Pipeline(steps=[('selectpercentile',\n",
-       "                 SelectPercentile(percentile=75.04535288452273)),\n",
+       "                 SelectPercentile(percentile=1.0089148758394795)),\n",
        "                ('nystroem',\n",
-       "                 Nystroem(gamma=0.4607961332716787, kernel='laplacian',\n",
-       "                          n_components=90)),\n",
-       "                ('bernoullinb',\n",
-       "                 BernoulliNB(alpha=2.4816194955956314, fit_prior=False))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SelectPercentile(percentile=1.0089148758394795)
Nystroem(gamma=0.2371171340711561, kernel='cosine', n_components=73)
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=0.003591562007988768,\n",
+       "              max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=8, max_leaves=None,\n",
+       "              min_child_weight=1, missing=nan, monotone_constraints=None,\n",
+       "              multi_strategy=None, n_estimators=100, n_jobs=1,\n",
+       "              num_parallel_tree=None, random_state=None, ...)
" ], "text/plain": [ "Pipeline(steps=[('selectpercentile',\n", - " SelectPercentile(percentile=75.04535288452273)),\n", + " SelectPercentile(percentile=1.0089148758394795)),\n", " ('nystroem',\n", - " Nystroem(gamma=0.4607961332716787, kernel='laplacian',\n", - " n_components=90)),\n", - " ('bernoullinb',\n", - " BernoulliNB(alpha=2.4816194955956314, fit_prior=False))])" + " Nystroem(gamma=0.2371171340711561, kernel='cosine',\n", + " n_components=73)),\n", + " ('xgbclassifier',\n", + " XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None,\n", + " early_stopping_rounds=None,\n", + " enab...\n", + " feature_types=None, gamma=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=0.003591562007988768, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=8,\n", + " max_leaves=None, min_child_weight=1, missing=nan,\n", + " monotone_constraints=None, multi_strategy=None,\n", + " n_estimators=100, n_jobs=1,\n", + " num_parallel_tree=None, random_state=None, ...))])" ] }, "execution_count": 14, @@ -4359,11 +4407,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 0%| | 0/5 [00:00
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
        "              population_size=10, scorers=['roc_auc'], scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x77c026a110c0>,\n",
+       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x7544c5ab8f40>,\n",
        "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n", " population_size=10, scorers=['roc_auc'], scorers_weights=[1],\n", - " search_space=,\n", + " search_space=,\n", " verbose=2)" ] }, @@ -4845,7 +4889,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "auroc score 0.9876518024288388\n" + "auroc score 0.9501489525273881\n" ] } ], @@ -4865,7 +4909,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] diff --git a/tpot2/graphsklearn.py b/tpot2/graphsklearn.py index e0d500ae..7c519af5 100644 --- a/tpot2/graphsklearn.py +++ b/tpot2/graphsklearn.py @@ -231,7 +231,7 @@ def __init__( graph, cross_val_predict_cv=0, #signature function(estimator, X, y=none) method='auto', - memory=None, #TODO memory caching like sklearn.pipeline + memory=None, use_label_encoder=False, **kwargs, ): @@ -252,7 +252,7 @@ def __init__( The prediction method to use for the inner classifiers or regressors. If 'auto', it will try to use predict_proba, decision_function, or predict in that order. memory: str or object with the joblib.Memory interface, optional - Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. + Used to cache the input and outputs of nodes to prevent refitting or computationally heavy transformations. By default, no caching is performed. If a string is given, it is the path to the caching directory. use_label_encoder: bool, optional If True, the label encoder is used to encode the labels to be 0 to N. If False, the label encoder is not used. diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 88955ba7..09fc61e5 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -6,7 +6,10 @@ from typing import Generator, List, Tuple, Union import random from sklearn.base import BaseEstimator - +import sklearn +import networkx as nx +from . import graph_utils +from typing import final class SklearnIndividual(tpot2.BaseIndividual): @@ -25,10 +28,107 @@ def export_pipeline(self) -> BaseEstimator: def unique_id(self): return self + @final + def export_flattened_graphpipeline(self) -> tpot2.GraphPipeline: + return flatten_to_graphpipeline(self.export_pipeline()) class SklearnIndividualGenerator(): def __init__(self,): pass def generate(self, rng=None) -> SklearnIndividual: - pass \ No newline at end of file + pass + + + + + + +def flatten_graphpipeline(est): + flattened_full_graph = est.graph.copy() + + #put ests into the node label from the attributes + + flattened_full_graph = nx.relabel_nodes(flattened_full_graph, {n: flattened_full_graph.nodes[n]['instance'] for n in flattened_full_graph.nodes}) + + + remove_list = [] + for node in flattened_full_graph.nodes: + if isinstance(node, nx.DiGraph): + flattened = flatten_any(node) + + roots = graph_utils.get_roots(flattened) + leaves = graph_utils.get_leaves(flattened) + + n1_s = flattened_full_graph.successors(node) + n1_p = flattened_full_graph.predecessors(node) + + remove_list.append(node) + + flattened_full_graph = nx.compose(flattened_full_graph, flattened) + + + flattened_full_graph.add_edges_from([ (n2, n) for n in n1_s for n2 in leaves]) + flattened_full_graph.add_edges_from([ (n, n2) for n in n1_p for n2 in roots]) + + for node in remove_list: + flattened_full_graph.remove_node(node) + + return flattened_full_graph + +def flatten_pipeline(est): + graph = nx.DiGraph() + steps = [flatten_any(s[1]) for s in est.steps] + + #add steps to graph and connect them + for s in steps: + graph = nx.compose(graph, s) + + #connect leaves of each step to the roots of the next step + for i in range(len(steps)-1): + roots = graph_utils.get_roots(steps[i]) + leaves = graph_utils.get_leaves(steps[i+1]) + graph.add_edges_from([ (l,r) for l in leaves for r in roots]) + + + return graph + + + +def flatten_estimator(est): + graph = nx.DiGraph() + graph.add_node(est) + return graph + +def flatten_any(est): + if isinstance(est, tpot2.GraphPipeline): + return flatten_graphpipeline(est) + elif isinstance(est, sklearn.pipeline.Pipeline): + return flatten_pipeline(est) + else: + return flatten_estimator(est) + + +def flatten_to_graphpipeline(est): + #rename nodes to string representation of the instance and put the instance in the node attributes + flattened_full_graph = flatten_any(est) + + instance_to_label = {} + label_to_instance = {} + for node in flattened_full_graph.nodes: + found_unique_label = False + i=1 + while not found_unique_label: + new_label = f"{node.__class__.__name__}_{i}" + if new_label not in label_to_instance: + found_unique_label = True + i+=1 + label_to_instance[new_label] = node + instance_to_label[node] = new_label + + flattened_full_graph = nx.relabel_nodes(flattened_full_graph, instance_to_label) + + for label, instance in label_to_instance.items(): + flattened_full_graph.nodes[label]["instance"] = instance + + return tpot2.GraphPipeline(flattened_full_graph) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/__init__.py b/tpot2/search_spaces/pipelines/__init__.py index ec90eb0e..b0c2c74d 100644 --- a/tpot2/search_spaces/pipelines/__init__.py +++ b/tpot2/search_spaces/pipelines/__init__.py @@ -3,4 +3,6 @@ from .sequential import * from .graph import * from .tree import * -from .wrapper import * \ No newline at end of file +from .wrapper import * + +from . import graph_utils \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py index 0ebe7092..9f70577f 100644 --- a/tpot2/search_spaces/pipelines/graph.py +++ b/tpot2/search_spaces/pipelines/graph.py @@ -1,10 +1,6 @@ import tpot2 import numpy as np -import pandas as pd -import sklearn -from tpot2 import config from typing import Generator, List, Tuple, Union -import random from ..base import SklearnIndividual, SklearnIndividualGenerator import networkx as nx import copy @@ -12,20 +8,67 @@ import itertools from .graph_utils import * from ..nodes.estimator_node import EstimatorNodeIndividual - +from typing import Union, Callable +import sklearn class GraphPipelineIndividual(SklearnIndividual): - def __init__(self, - root_search_space : SklearnIndividualGenerator, - leaf_search_space : SklearnIndividualGenerator = None, - inner_search_space : SklearnIndividualGenerator =None, - max_size: int = 10, - crossover_same_depth=False, - rng=None) -> None: - """ - Generates a tree shaped pipeline individual. Can be used to export a sklearn Pipeline that uses feature unions to merge branches of the pipeline. + """ + Defines a search space of pipelines in the shape of a Directed Acyclic Graphs. The search spaces for root, leaf, and inner nodes can be defined separately if desired. + Each graph will have a single root serving as the final estimator which is drawn from the `root_search_space`. If the `leaf_search_space` is defined, all leaves + in the pipeline will be drawn from that search space. If the `leaf_search_space` is not defined, all leaves will be drawn from the `inner_search_space`. + Nodes that are not leaves or roots will be drawn from the `inner_search_space`. If the `inner_search_space` is not defined, there will be no inner nodes. + + `cross_val_predict_cv`, `method`, `memory`, and `use_label_encoder` are passed to the GraphPipeline object when the pipeline is exported and not directly used in the search space. + + Exports to a GraphPipeline object. + + Parameters + ---------- + + root_search_space: SklearnIndividualGenerator + The search space for the root node of the graph. This node will be the final estimator in the pipeline. + inner_search_space: SklearnIndividualGenerator, optional + The search space for the inner nodes of the graph. If not defined, there will be no inner nodes. + + leaf_search_space: SklearnIndividualGenerator, optional + The search space for the leaf nodes of the graph. If not defined, the leaf nodes will be drawn from the inner_search_space. + + crossover_same_depth: bool, optional + If True, crossover will only occur between nodes at the same depth in the graph. If False, crossover will occur between nodes at any depth. + + cross_val_predict_cv: int, cross-validation generator or an iterable, optional + Determines the cross-validation splitting strategy used in inner classifiers or regressors + + method: str, optional + The prediction method to use for the inner classifiers or regressors. If 'auto', it will try to use predict_proba, decision_function, or predict in that order. + + memory: str or object with the joblib.Memory interface, optional + Used to cache the input and outputs of nodes to prevent refitting or computationally heavy transformations. By default, no caching is performed. If a string is given, it is the path to the caching directory. + + use_label_encoder: bool, optional + If True, the label encoder is used to encode the labels to be 0 to N. If False, the label encoder is not used. + Mainly useful for classifiers (XGBoost) that require labels to be ints from 0 to N. + Can also be a sklearn.preprocessing.LabelEncoder object. If so, that label encoder is used. + + rng: int, RandomState instance or None, optional + Seed for sampling the first graph instance. + """ + + def __init__( + self, + root_search_space: SklearnIndividualGenerator, + leaf_search_space: SklearnIndividualGenerator = None, + inner_search_space: SklearnIndividualGenerator = None, + max_size: int = np.inf, + crossover_same_depth: bool = False, + cross_val_predict_cv: Union[int, Callable] = 0, #signature function(estimator, X, y=none) + method: str = 'auto', + memory=None, + use_label_encoder: bool = False, + rng=None): + super().__init__() self.__debug = False @@ -38,6 +81,11 @@ def __init__(self, self.max_size = max_size self.crossover_same_depth = crossover_same_depth + self.cross_val_predict_cv = cross_val_predict_cv + self.method = method + self.memory = memory + self.use_label_encoder = use_label_encoder + self.root = self.root_search_space.generate(rng) self.graph = nx.DiGraph() self.graph.add_node(self.root) @@ -535,7 +583,7 @@ def _merge_duplicated_nodes(self): return graph_changed - def export_pipeline(self, **graph_pipeline_args): + def export_pipeline(self): estimator_graph = self.graph.copy() #mapping = {node:node.method_class(**node.hyperparameters) for node in estimator_graph} @@ -561,7 +609,7 @@ def export_pipeline(self, **graph_pipeline_args): for label, instance in label_to_instance.items(): estimator_graph.nodes[label]["instance"] = instance - return tpot2.GraphPipeline(graph=estimator_graph, **graph_pipeline_args) + return tpot2.GraphPipeline(graph=estimator_graph, memory=self.memory, use_label_encoder=self.use_label_encoder, method=self.method, cross_val_predict_cv=self.cross_val_predict_cv) def plot(self): @@ -621,28 +669,74 @@ def unique_id(self): class GraphPipeline(SklearnIndividualGenerator): - def __init__(self, root_search_space : SklearnIndividualGenerator, - leaf_search_space : SklearnIndividualGenerator = None, - inner_search_space : SklearnIndividualGenerator =None, - max_size: int = np.inf, - crossover_same_depth=False) -> None: + def __init__(self, + root_search_space: SklearnIndividualGenerator, + leaf_search_space: SklearnIndividualGenerator = None, + inner_search_space: SklearnIndividualGenerator = None, + max_size: int = np.inf, + crossover_same_depth: bool = False, + cross_val_predict_cv: Union[int, Callable] = 0, #signature function(estimator, X, y=none) + method: str = 'auto', + memory=None, + use_label_encoder: bool = False,): """ - Generates a directed acyclic graph of variable size. Search spaces for root, leaf, and inner nodes can be defined separately if desired. + Defines a search space of pipelines in the shape of a Directed Acyclic Graphs. The search spaces for root, leaf, and inner nodes can be defined separately if desired. + Each graph will have a single root serving as the final estimator which is drawn from the `root_search_space`. If the `leaf_search_space` is defined, all leaves + in the pipeline will be drawn from that search space. If the `leaf_search_space` is not defined, all leaves will be drawn from the `inner_search_space`. + Nodes that are not leaves or roots will be drawn from the `inner_search_space`. If the `inner_search_space` is not defined, there will be no inner nodes. + + `cross_val_predict_cv`, `method`, `memory`, and `use_label_encoder` are passed to the GraphPipeline object when the pipeline is exported and not directly used in the search space. + Exports to a GraphPipeline object. + Parameters + ---------- + + root_search_space: SklearnIndividualGenerator + The search space for the root node of the graph. This node will be the final estimator in the pipeline. + + inner_search_space: SklearnIndividualGenerator, optional + The search space for the inner nodes of the graph. If not defined, there will be no inner nodes. + + leaf_search_space: SklearnIndividualGenerator, optional + The search space for the leaf nodes of the graph. If not defined, the leaf nodes will be drawn from the inner_search_space. + + crossover_same_depth: bool, optional + If True, crossover will only occur between nodes at the same depth in the graph. If False, crossover will occur between nodes at any depth. + + cross_val_predict_cv: int, cross-validation generator or an iterable, optional + Determines the cross-validation splitting strategy used in inner classifiers or regressors + + method: str, optional + The prediction method to use for the inner classifiers or regressors. If 'auto', it will try to use predict_proba, decision_function, or predict in that order. + + memory: str or object with the joblib.Memory interface, optional + Used to cache the input and outputs of nodes to prevent refitting or computationally heavy transformations. By default, no caching is performed. If a string is given, it is the path to the caching directory. + + use_label_encoder: bool, optional + If True, the label encoder is used to encode the labels to be 0 to N. If False, the label encoder is not used. + Mainly useful for classifiers (XGBoost) that require labels to be ints from 0 to N. + Can also be a sklearn.preprocessing.LabelEncoder object. If so, that label encoder is used. + """ - self.search_space = root_search_space + self.root_search_space = root_search_space self.leaf_search_space = leaf_search_space self.inner_search_space = inner_search_space self.max_size = max_size self.crossover_same_depth = crossover_same_depth + self.cross_val_predict_cv = cross_val_predict_cv + self.method = method + self.memory = memory + self.use_label_encoder = use_label_encoder + def generate(self, rng=None): rng = np.random.default_rng(rng) - ind = GraphPipelineIndividual(self.search_space, self.leaf_search_space, self.inner_search_space, self.max_size, self.crossover_same_depth, rng=rng) + ind = GraphPipelineIndividual(self.root_search_space, self.leaf_search_space, self.inner_search_space, self.max_size, self.crossover_same_depth, + self.cross_val_predict_cv, self.method, self.memory, self.use_label_encoder, rng=rng) # if user specified limit, grab a random number between that limit n_nodes = min(rng.integers(1, self.max_size), 5) From 59dad61b6854c160220e4986bf19d2e8fcf3229b Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 10 Apr 2024 11:42:04 -0700 Subject: [PATCH 10/75] flatten to graphpipeline, steadystate --- tpot2/config/get_configspace.py | 13 ++- tpot2/evolvers/__init__.py | 2 +- tpot2/evolvers/steady_state_evolver.py | 6 - tpot2/search_spaces/base.py | 8 +- .../{pipelines => }/graph_utils.py | 0 .../nodes/genetic_feature_selection.py | 6 +- tpot2/search_spaces/pipelines/__init__.py | 4 +- tpot2/search_spaces/pipelines/graph.py | 5 +- tpot2/search_spaces/pipelines/tree.py | 2 +- tpot2/tpot_estimator/estimator.py | 44 +++++--- tpot2/tpot_estimator/estimator_utils.py | 104 ++++-------------- .../tpot_estimator/steady_state_estimator.py | 79 ++++++------- 12 files changed, 111 insertions(+), 162 deletions(-) rename tpot2/search_spaces/{pipelines => }/graph_utils.py (100%) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 2c4485bf..44892278 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -18,7 +18,8 @@ from . import classifiers_sklearnex from . import regressors_sklearnex - +from ConfigSpace import ConfigurationSpace +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal #autoqtl_builtins from tpot2.builtin_modules import genetic_encoders @@ -163,7 +164,7 @@ "classifiers" : ["LogisticRegression", "DecisionTreeClassifier", "KNeighborsClassifier", "GradientBoostingClassifier", "ExtraTreesClassifier", "RandomForestClassifier", "SGDClassifier", "GaussianNB", "BernoulliNB", "MultinomialNB", "XGBClassifier", "SVC", "MLPClassifier"], "regressors" : ["ElasticNetCV", "ExtraTreesRegressor", "GradientBoostingRegressor", "AdaBoostRegressor", "DecisionTreeRegressor", "KNeighborsRegressor", "LassoLarsCV", "SVR", "RandomForestRegressor", "RidgeCV", "XGBRegressor", "SGDRegressor" ], "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"], - "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer", "ZeroTransformer", "OneTransformer", "NTransformer"], + "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"], "imputers": [], "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], "genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"], @@ -286,7 +287,13 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta case "OneTransformer": return {} case "NTransformer": - return {} + return ConfigurationSpace( + + space = { + + 'n': Float("n", bounds=(-1e3, 1e3), log=True), + } + ) #imputers.py diff --git a/tpot2/evolvers/__init__.py b/tpot2/evolvers/__init__.py index 1d6af1a9..cf130f80 100644 --- a/tpot2/evolvers/__init__.py +++ b/tpot2/evolvers/__init__.py @@ -1,2 +1,2 @@ from .base_evolver import * -#from .steady_state_evolver import * \ No newline at end of file +from .steady_state_evolver import * \ No newline at end of file diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index 5db3e502..1aa457c8 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -1,17 +1,11 @@ #All abstract methods in the Evolutionary_Optimization module - -from abc import abstractmethod import tpot2 import typing import tqdm -from tpot2.individual_representations import BaseIndividual import time import numpy as np -import copy -import scipy import os import pickle -import statistics from tqdm.dask import TqdmCallback import distributed from dask.distributed import Client diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 09fc61e5..80388708 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -29,8 +29,8 @@ def unique_id(self): return self @final - def export_flattened_graphpipeline(self) -> tpot2.GraphPipeline: - return flatten_to_graphpipeline(self.export_pipeline()) + def export_flattened_graphpipeline(self, **graphpipeline_kwargs) -> tpot2.GraphPipeline: + return flatten_to_graphpipeline(self.export_pipeline(), **graphpipeline_kwargs) class SklearnIndividualGenerator(): def __init__(self,): @@ -109,7 +109,7 @@ def flatten_any(est): return flatten_estimator(est) -def flatten_to_graphpipeline(est): +def flatten_to_graphpipeline(est, **graphpipeline_kwargs): #rename nodes to string representation of the instance and put the instance in the node attributes flattened_full_graph = flatten_any(est) @@ -131,4 +131,4 @@ def flatten_to_graphpipeline(est): for label, instance in label_to_instance.items(): flattened_full_graph.nodes[label]["instance"] = instance - return tpot2.GraphPipeline(flattened_full_graph) \ No newline at end of file + return tpot2.GraphPipeline(flattened_full_graph, **graphpipeline_kwargs) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/graph_utils.py b/tpot2/search_spaces/graph_utils.py similarity index 100% rename from tpot2/search_spaces/pipelines/graph_utils.py rename to tpot2/search_spaces/graph_utils.py diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index e51ff8ba..1894026a 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -157,7 +157,7 @@ def __init__(self, crossover_rate = 0.5, mutation_rate_rate = 0, crossover_rate_rate = 0, - rng=None,): + ): self.n_features = n_features self.start_p = start_p @@ -165,7 +165,7 @@ def __init__(self, self.crossover_rate = crossover_rate self.mutation_rate_rate = mutation_rate_rate self.crossover_rate_rate = crossover_rate_rate - self.rng = rng + def generate(self, rng=None) -> SklearnIndividual: return GeneticFeatureSelectorIndividual( mask=self.n_features, @@ -174,5 +174,5 @@ def generate(self, rng=None) -> SklearnIndividual: crossover_rate=self.crossover_rate, mutation_rate_rate=self.mutation_rate_rate, crossover_rate_rate=self.crossover_rate_rate, - rng=self.rng + rng=rng ) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/__init__.py b/tpot2/search_spaces/pipelines/__init__.py index b0c2c74d..ec90eb0e 100644 --- a/tpot2/search_spaces/pipelines/__init__.py +++ b/tpot2/search_spaces/pipelines/__init__.py @@ -3,6 +3,4 @@ from .sequential import * from .graph import * from .tree import * -from .wrapper import * - -from . import graph_utils \ No newline at end of file +from .wrapper import * \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py index 9f70577f..5c6668b9 100644 --- a/tpot2/search_spaces/pipelines/graph.py +++ b/tpot2/search_spaces/pipelines/graph.py @@ -6,7 +6,7 @@ import copy import matplotlib.pyplot as plt import itertools -from .graph_utils import * +from ..graph_utils import * from ..nodes.estimator_node import EstimatorNodeIndividual from typing import Union, Callable import sklearn @@ -360,7 +360,8 @@ def _crossover_swap_branch(self, G2, rng=None): node1_is_leaf = len(list(self.graph.successors(node1))) == 0 node2_is_leaf = len(list(G2.graph.successors(node2))) == 0 #if not ((node1_is_leaf and node1_is_leaf) or (not node1_is_leaf and not node2_is_leaf)): #if node1 is a leaf - if (node1_is_leaf and (not node2_is_leaf)) or ( (not node1_is_leaf) and node2_is_leaf): + #if (node1_is_leaf and (not node2_is_leaf)) or ( (not node1_is_leaf) and node2_is_leaf): + if not node1_is_leaf: #only continue if node1 and node2 are both leaves or both not leaves continue diff --git a/tpot2/search_spaces/pipelines/tree.py b/tpot2/search_spaces/pipelines/tree.py index de4c2aef..813a59e1 100644 --- a/tpot2/search_spaces/pipelines/tree.py +++ b/tpot2/search_spaces/pipelines/tree.py @@ -13,7 +13,7 @@ from .graph import GraphPipelineIndividual, GraphPipeline -from .graph_utils import * +from ..graph_utils import * class TreePipelineIndividual(GraphPipelineIndividual): def __init__(self, diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index 7465564c..999dbffe 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -29,7 +29,9 @@ def set_dask_settings(): #TODO inherit from _BaseComposition? class TPOTEstimator(BaseEstimator): - def __init__(self, scorers, + def __init__(self, + search_space, + scorers, scorers_weights, classification, cv = 5, @@ -38,13 +40,12 @@ def __init__(self, scorers, objective_function_names = None, bigger_is_better = True, - search_space = None, - - + export_graphpipeline = False, cross_val_predict_cv = 0, + memory = None, + categorical_features = None, subsets = None, - memory = None, preprocessing = False, population_size = 50, initial_population_size = None, @@ -87,7 +88,7 @@ def __init__(self, scorers, #dask parameters n_jobs=1, - memory_limit = "4GB", + memory_limit = None, client = None, processes = True, @@ -369,10 +370,17 @@ def __init__(self, scorers, self.search_space = search_space + self.export_graphpipeline = export_graphpipeline self.cross_val_predict_cv = cross_val_predict_cv + self.memory = memory + + if self.cross_val_predict_cv !=0 or self.memory is not None: + if not self.export_graphpipeline: + raise ValueError("cross_val_predict_cv and memory parameters are parameters for GraphPipeline. To enable these options export_graphpipeline to be True. Otherwise these can be passed into the relevant Search spaces as parameters.") + self.categorical_features = categorical_features self.subsets = subsets - self.memory = memory + self.preprocessing = preprocessing self.validation_strategy = validation_strategy self.validation_fraction = validation_fraction @@ -600,6 +608,7 @@ def objective_function(pipeline_individual, scorers= self._scorers, cv=self.cv_gen, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, **kwargs): @@ -611,6 +620,7 @@ def objective_function(pipeline_individual, scorers= scorers, cv=cv, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, **kwargs, @@ -713,6 +723,7 @@ def ind_generator(rng): scorers= self._scorers, cv=self.cv_gen, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, @@ -724,6 +735,7 @@ def ind_generator(rng): scorers= scorers, cv=cv, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, **kwargs, @@ -738,7 +750,8 @@ def ind_generator(rng): self.objective_names_for_selection = val_objective_names self.evaluated_individuals.loc[best_pareto_front_idx,val_objective_names] = val_scores - self.evaluated_individuals["Validation_Pareto_Front"] = tpot2.utils.get_pareto_front(self.evaluated_individuals, val_objective_names, self.objective_function_weights, invalid_values=["TIMEOUT","INVALID"]) + self.evaluated_individuals["Validation_Pareto_Front"] = tpot2.utils.get_pareto_frontier(self.evaluated_individuals, column_names=val_objective_names, weights=self.objective_function_weights, invalid_values=["TIMEOUT","INVALID"]) + elif validation_strategy == 'split': @@ -765,6 +778,7 @@ def ind_generator(rng): y_val, scorers= self._scorers, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, **kwargs: val_objective_function_generator( @@ -775,6 +789,7 @@ def ind_generator(rng): y_val, scorers= scorers, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, **kwargs, @@ -787,11 +802,11 @@ def ind_generator(rng): val_objective_names = ['validation_'+name for name in self.objective_names] self.objective_names_for_selection = val_objective_names self.evaluated_individuals.loc[best_pareto_front_idx,val_objective_names] = val_scores - self.evaluated_individuals["Validation_Pareto_Front"] = tpot2.utils.get_pareto_front(self.evaluated_individuals, val_objective_names, self.objective_function_weights, invalid_values=["TIMEOUT","INVALID"]) + self.evaluated_individuals["Validation_Pareto_Front"] = tpot2.utils.get_pareto_frontier(self.evaluated_individuals, column_names=val_objective_names, weights=self.objective_function_weights, invalid_values=["TIMEOUT","INVALID"]) else: self.objective_names_for_selection = self.objective_names - - val_scores = self.evaluated_individuals[~self.evaluated_individuals[self.objective_names_for_selection].isin(["TIMEOUT","INVALID"]).any(axis=1)][self.objective_names_for_selection].astype(float) + + val_scores = self.evaluated_individuals[~self.evaluated_individuals[self.objective_names_for_selection].isna().all(1)][self.objective_names_for_selection] weighted_scores = val_scores*self.objective_function_weights if self.bigger_is_better: @@ -805,7 +820,10 @@ def ind_generator(rng): #TODO #best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) - best_individual_pipeline = best_individual.export_pipeline() + if self.export_graphpipeline: + best_individual_pipeline = best_individual.export_flattened_graphpipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) + else: + best_individual_pipeline = best_individual.export_pipeline() if self.preprocessing: self.fitted_pipeline_ = sklearn.pipeline.make_pipeline(sklearn.base.clone(self._preprocessing_pipeline), best_individual_pipeline ) @@ -888,7 +906,7 @@ def make_evaluated_individuals(self): self.evaluated_individuals = self.evaluated_individuals.set_index(self.evaluated_individuals.index.map(object_to_int)) self.evaluated_individuals['Parents'] = self.evaluated_individuals['Parents'].apply(lambda row: convert_parents_tuples_to_integers(row, object_to_int)) - self.evaluated_individuals["Instance"] = self.evaluated_individuals["Individual"].apply(lambda ind: apply_make_pipeline(ind, preprocessing_pipeline=self._preprocessing_pipeline)) + self.evaluated_individuals["Instance"] = self.evaluated_individuals["Individual"].apply(lambda ind: apply_make_pipeline(ind, preprocessing_pipeline=self._preprocessing_pipeline, export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv)) return self.evaluated_individuals diff --git a/tpot2/tpot_estimator/estimator_utils.py b/tpot2/tpot_estimator/estimator_utils.py index c0b79739..7be96e26 100644 --- a/tpot2/tpot_estimator/estimator_utils.py +++ b/tpot2/tpot_estimator/estimator_utils.py @@ -13,97 +13,33 @@ def convert_parents_tuples_to_integers(row, object_to_int): return np.nan #TODO add kwargs -def apply_make_pipeline(graphindividual, preprocessing_pipeline=None): +def apply_make_pipeline(graphindividual, preprocessing_pipeline=None, export_graphpipeline=False, **pipeline_kwargs): try: - if preprocessing_pipeline is None: - return graphindividual.export_pipeline() - else: - return sklearn.pipeline.make_pipeline(sklearn.base.clone(preprocessing_pipeline), graphindividual.export_pipeline()) - except: - return None - -def get_configuration_dictionary(options, n_samples, n_features, classification, random_state=None, cv=None, subsets=None, feature_names=None, n_classes=None): - if options is None: - return options - - if isinstance(options, dict): - return recursive_with_defaults(options, n_samples, n_features, classification, random_state=None, cv=None, subsets=subsets, feature_names=feature_names, n_classes=n_classes) - - if not isinstance(options, list): - options = [options] - - config_dict = {} - - for option in options: - - if option == "selectors": - config_dict.update(tpot2.config.make_selector_config_dictionary(random_state=random_state, classifier=classification)) - - elif option == "classifiers": - config_dict.update(tpot2.config.make_classifier_config_dictionary(random_state=random_state, n_samples=n_samples, n_classes=n_classes)) - - elif option == "classifiers_sklearnex": - config_dict.update(tpot2.config.make_sklearnex_classifier_config_dictionary(random_state=random_state, n_samples=n_samples, n_classes=n_classes)) - - elif option == "regressors": - config_dict.update(tpot2.config.make_regressor_config_dictionary(random_state=random_state, cv=cv, n_samples=n_samples)) - - elif option == "regressors_sklearnex": - config_dict.update(tpot2.config.make_sklearnex_regressor_config_dictionary(random_state=random_state, n_samples=n_samples)) - - elif option == "transformers": - config_dict.update(tpot2.config.make_transformer_config_dictionary(random_state=random_state, n_features=n_features)) - - elif option == "arithmetic_transformer": - config_dict.update(tpot2.config.make_arithmetic_transformer_config_dictionary()) - - elif option == "feature_set_selector": - config_dict.update(tpot2.config.make_FSS_config_dictionary(subsets, n_features, feature_names=feature_names)) - - elif option == "skrebate": - config_dict.update(tpot2.config.make_skrebate_config_dictionary(n_features=n_features)) - - elif option == "MDR": - config_dict.update(tpot2.config.make_MDR_config_dictionary()) - - elif option == "continuousMDR": - config_dict.update(tpot2.config.make_ContinuousMDR_config_dictionary()) - - elif option == "FeatureEncodingFrequencySelector": - config_dict.update(tpot2.config.make_FeatureEncodingFrequencySelector_config_dictionary()) - - elif option == "genetic encoders": - config_dict.update(tpot2.config.make_genetic_encoders_config_dictionary()) - - elif option == "passthrough": - config_dict.update(tpot2.config.make_passthrough_config_dictionary()) - + if export_graphpipeline: + est = graphindividual.export_flattened_graphpipeline(**pipeline_kwargs) else: - config_dict.update(recursive_with_defaults(option, n_samples, n_features, classification, random_state, cv, subsets=subsets, feature_names=feature_names, n_classes=n_classes)) - - if len(config_dict) == 0: - raise ValueError("No valid configuration options were provided. Please check the options you provided and try again.") + est = graphindividual.export_pipeline() - return config_dict -def recursive_with_defaults(config_dict, n_samples, n_features, classification, random_state=None, cv=None, subsets=None, feature_names=None, n_classes=None): + if preprocessing_pipeline is None: + return est + else: + return sklearn.pipeline.make_pipeline(sklearn.base.clone(preprocessing_pipeline), est) + except: + return None - for key in 'leaf_config_dict', 'root_config_dict', 'inner_config_dict', 'Recursive': - if key in config_dict: - value = config_dict[key] - if key=="Resursive": - config_dict[key] = recursive_with_defaults(value, n_samples, n_features, classification, random_state, cv, subsets=None, feature_names=None, n_classes=None) - else: - config_dict[key] = get_configuration_dictionary(value, n_samples, n_features, classification, random_state, cv, subsets, feature_names, n_classes) - return config_dict -def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_functions, step=None, budget=None, generation=1, is_classification=True, **pipeline_kwargs): +def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_functions, step=None, budget=None, generation=1, is_classification=True, export_graphpipeline=False, **pipeline_kwargs): #pipeline = pipeline.export_pipeline(**pipeline_kwargs) - pipeline = pipeline.export_pipeline() + if export_graphpipeline: + pipeline = pipeline.export_flattened_graphpipeline(**pipeline_kwargs) + else: + pipeline = pipeline.export_pipeline() + if budget is not None and budget < 1: if is_classification: x,y = sklearn.utils.resample(x,y, stratify=y, n_samples=int(budget*len(x)), replace=False, random_state=1) @@ -129,9 +65,13 @@ def objective_function_generator(pipeline, x,y, scorers, cv, other_objective_fun return np.concatenate([cv_obj_scores,other_scores]) -def val_objective_function_generator(pipeline, X_train, y_train, X_test, y_test, scorers, other_objective_functions, **pipeline_kwargs): +def val_objective_function_generator(pipeline, X_train, y_train, X_test, y_test, scorers, other_objective_functions, export_graphpipeline=False, **pipeline_kwargs): #subsample the data - pipeline = pipeline.export_pipeline(**pipeline_kwargs) + if export_graphpipeline: + pipeline = pipeline.export_flattened_graphpipeline(**pipeline_kwargs) + else: + pipeline = pipeline.export_pipeline() + fitted_pipeline = sklearn.base.clone(pipeline) fitted_pipeline.fit(X_train, y_train) diff --git a/tpot2/tpot_estimator/steady_state_estimator.py b/tpot2/tpot_estimator/steady_state_estimator.py index 777c8cad..c73584b6 100644 --- a/tpot2/tpot_estimator/steady_state_estimator.py +++ b/tpot2/tpot_estimator/steady_state_estimator.py @@ -27,7 +27,9 @@ def set_dask_settings(): #TODO inherit from _BaseComposition? class TPOTEstimatorSteadyState(BaseEstimator): - def __init__(self, scorers= [], + def __init__(self, + search_space, + scorers= [], scorers_weights = [], classification = False, cv = 5, @@ -35,15 +37,14 @@ def __init__(self, scorers= [], other_objective_functions_weights = [], objective_function_names = None, bigger_is_better = True, - max_size = np.inf, - linear_pipeline = False, - root_config_dict= 'Auto', - inner_config_dict=["selectors", "transformers"], - leaf_config_dict= None, + + + export_graphpipeline = False, cross_val_predict_cv = 0, + memory = None, + categorical_features = None, subsets = None, - memory = None, preprocessing = False, validation_strategy = "none", validation_fraction = .2, @@ -77,7 +78,6 @@ def __init__(self, scorers= [], stepwise_steps = 5, warm_start = False, - subset_column = None, verbose = 0, periodic_checkpoint_folder = None, @@ -364,8 +364,6 @@ def __init__(self, scorers= [], warm_start : bool, default=False If True, will use the continue the evolutionary algorithm from the last generation of the previous run. - subset_column : str or int, default=None - EXPERIMENTAL The column to use for the subset selection. Must also pass in unique_subset_values to GraphIndividual to function. verbose : int, default=1 How much information to print during the optimization process. Higher values include the information from lower values. @@ -422,6 +420,7 @@ def __init__(self, scorers= [], # sklearn BaseEstimator must have a corresponding attribute for each parameter. # These should not be modified once set. + self.search_space = search_space self.scorers = scorers self.scorers_weights = scorers_weights self.classification = classification @@ -430,15 +429,18 @@ def __init__(self, scorers= [], self.other_objective_functions_weights = other_objective_functions_weights self.objective_function_names = objective_function_names self.bigger_is_better = bigger_is_better - self.max_size = max_size - self.linear_pipeline = linear_pipeline - self.root_config_dict= root_config_dict - self.inner_config_dict= inner_config_dict - self.leaf_config_dict= leaf_config_dict + + self.export_graphpipeline = export_graphpipeline self.cross_val_predict_cv = cross_val_predict_cv + self.memory = memory + + if self.cross_val_predict_cv !=0 or self.memory is not None: + if not self.export_graphpipeline: + raise ValueError("cross_val_predict_cv and memory parameters are parameters for GraphPipeline. To enable these options export_graphpipeline to be True. Otherwise these can be passed into the relevant Search spaces as parameters.") + + self.categorical_features = categorical_features self.subsets = subsets - self.memory = memory self.preprocessing = preprocessing self.validation_strategy = validation_strategy self.validation_fraction = validation_fraction @@ -468,7 +470,6 @@ def __init__(self, scorers= [], self.stepwise_steps = stepwise_steps self.warm_start = warm_start - self.subset_column = subset_column self.verbose = verbose self.periodic_checkpoint_folder = periodic_checkpoint_folder @@ -660,17 +661,6 @@ def fit(self, X, y): else: self.feature_names = None - if self.root_config_dict == 'Auto': - if self.classification: - n_classes = len(np.unique(y)) - root_config_dict = get_configuration_dictionary("classifiers", n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names, n_classes=n_classes) - else: - root_config_dict = get_configuration_dictionary("regressors", n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) - else: - root_config_dict = get_configuration_dictionary(self.root_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets,feature_names=self.feature_names) - - inner_config_dict = get_configuration_dictionary(self.inner_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) - leaf_config_dict = get_configuration_dictionary(self.leaf_config_dict, n_samples, n_features, self.classification, self.random_state, self.cv_gen, subsets=self.subsets, feature_names=self.feature_names) @@ -681,9 +671,9 @@ def objective_function(pipeline_individual, scorers= self._scorers, cv=self.cv_gen, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, **kwargs): return objective_function_generator( pipeline_individual, @@ -693,19 +683,16 @@ def objective_function(pipeline_individual, scorers= scorers, cv=cv, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, ) - self.individual_generator_instance = tpot2.individual_representations.graph_pipeline_individual.estimator_graph_individual_generator( - inner_config_dict=inner_config_dict, - root_config_dict=root_config_dict, - leaf_config_dict=leaf_config_dict, - max_size = self.max_size, - linear_pipeline=self.linear_pipeline, - ) + def ind_generator(rng): + rng = np.random.default_rng(rng) + while True: + yield self.search_space.generate(rng) @@ -718,7 +705,7 @@ def objective_function(pipeline_individual, #If warm start and we have an evolver instance, use the existing one if not(self.warm_start and self._evolver_instance is not None): - self._evolver_instance = self._evolver( individual_generator=self.individual_generator_instance, + self._evolver_instance = self._evolver( individual_generator=ind_generator(self.rng), objective_functions= [objective_function], objective_function_weights = self.objective_function_weights, objective_names=self.objective_names, @@ -805,9 +792,10 @@ def objective_function(pipeline_individual, scorers= self._scorers, cv=self.cv_gen, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, + **kwargs: objective_function_generator( ind, X, @@ -816,9 +804,9 @@ def objective_function(pipeline_individual, scorers= scorers, cv=cv, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, )] @@ -858,9 +846,9 @@ def objective_function(pipeline_individual, y_val, scorers= self._scorers, other_objective_functions=self.other_objective_functions, + export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, - subset_column=self.subset_column, **kwargs: val_objective_function_generator( ind, X, @@ -869,9 +857,9 @@ def objective_function(pipeline_individual, y_val, scorers= scorers, other_objective_functions=other_objective_functions, + export_graphpipeline=export_graphpipeline, memory=memory, cross_val_predict_cv=cross_val_predict_cv, - subset_column=subset_column, **kwargs, )] @@ -898,7 +886,10 @@ def objective_function(pipeline_individual, self.selected_best_score = self.evaluated_individuals.loc[best_idx] - best_individual_pipeline = best_individual.export_pipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv, subset_column=self.subset_column) + if self.export_graphpipeline: + best_individual_pipeline = best_individual.export_flattened_graphpipeline(memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv) + else: + best_individual_pipeline = best_individual.export_pipeline() if self.preprocessing: self.fitted_pipeline_ = sklearn.pipeline.make_pipeline(sklearn.base.clone(self._preprocessing_pipeline), best_individual_pipeline ) @@ -979,7 +970,7 @@ def make_evaluated_individuals(self): self.evaluated_individuals = self.evaluated_individuals.set_index(self.evaluated_individuals.index.map(object_to_int)) self.evaluated_individuals['Parents'] = self.evaluated_individuals['Parents'].apply(lambda row: convert_parents_tuples_to_integers(row, object_to_int)) - self.evaluated_individuals["Instance"] = self.evaluated_individuals["Individual"].apply(lambda ind: apply_make_pipeline(ind, preprocessing_pipeline=self._preprocessing_pipeline)) + self.evaluated_individuals["Instance"] = self.evaluated_individuals["Individual"].apply(lambda ind: apply_make_pipeline(ind, preprocessing_pipeline=self._preprocessing_pipeline, export_graphpipeline=self.export_graphpipeline, memory=self.memory, cross_val_predict_cv=self.cross_val_predict_cv)) return self.evaluated_individuals From d2dab4eadec600e5b4addaa634ade88936936750 Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 17 Apr 2024 21:23:44 -0700 Subject: [PATCH 11/75] lots of edits to configuration spaces --- README.md | 10 - Tutorial/2_Search_Spaces.ipynb | 455 +++++++++++++++++- .../builtin_modules/column_one_hot_encoder.py | 1 + tpot2/config/classifiers.py | 370 ++++++++++---- tpot2/config/classifiers_sklearnex.py | 10 +- tpot2/config/get_configspace.py | 199 ++++++-- tpot2/config/mdr_configs.py | 8 +- tpot2/config/regressors.py | 387 ++++++++++----- tpot2/config/regressors_sklearnex.py | 14 +- tpot2/config/special_configs.py | 51 -- tpot2/config/tests/__init__.py | 0 tpot2/config/tests/test_get_configspace.py | 26 + tpot2/config/transformers.py | 35 ++ tpot2/search_spaces/nodes/estimator_node.py | 61 ++- 14 files changed, 1271 insertions(+), 356 deletions(-) create mode 100644 tpot2/config/tests/__init__.py create mode 100644 tpot2/config/tests/test_get_configspace.py diff --git a/README.md b/README.md index 6f30b08d..f7551551 100644 --- a/README.md +++ b/README.md @@ -159,16 +159,6 @@ Setting `verbose` to 5 can be helpful during debugging as it will print out the We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT2, please file a new issue so we can discuss it. -### Known issues -* TPOT2 uses the func_timeout package to terminate long running pipelines. The early termination signal may fail on particular estimators and cause TPOT2 to run for longer than intended. If you are using your own custom configuration dictionaries, and are noticing that TPOT2 is running for longer than intended, this may be the issue. We are currently looking into it. Sometimes restarting TPOT2 resolves the issue. -* Periodic checkpoint folder may not correctly resume if using budget and/or initial_population size. -* Population class is slow to add new individuals. The Population class needs to be updated to use a dictionary for storage rather than a pandas dataframe. -* Crossover may sometimes go over the size restrictions. -* Memory caching with GraphPipeline may miss some nodes where the ordering on inputs happens to be different between two nodes. - - - - ### Support for TPOT2 TPOT2 was developed in the [Artificial Intelligence Innovation (A2I) Lab](http://epistasis.org/) at Cedars-Sinai with funding from the [NIH](http://www.nih.gov/) under grants U01 AG066833 and R01 LM010098. We are incredibly grateful for the support of the NIH and the Cedars-Sinai during the development of this project. diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index 8e0af2b9..51d2aff7 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -141,7 +141,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can sample generate an individual with the generate() function. This individual samples from the search space as well as provides mutation and crossover functions to modify the current sample." + "You can sample generate an individual with the generate() function. This individual samples from the search space as well as provides mutation and crossover functions to modify the current sample.\n", + "\n", + "Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string \"\\\". TPOT will automatically replace instances of this string with the Python None." ] }, { @@ -652,6 +654,455 @@ "knn_individual1.export_pipeline()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If a dictionary of parameters is passed instead of of a ConfigSpace, then the hyperparameters will be fixed and not learned." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsClassifier(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(n_neighbors=10)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tpot2\n", + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "space = {\n", + "\n", + " 'n_neighbors':10,\n", + "}\n", + "\n", + "knn_node = tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = KNeighborsClassifier,\n", + " space = space,\n", + ")\n", + "\n", + "knn_node.generate().export_pipeline()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1658,7 +2109,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "TPOT2 also comes with predefined search spaces. the helper function `tpot2.config.get_search_space` takes in a string or a list of strings, and returns either a EstimatorNode or a ChoicePipeline,respectively. \n", + "TPOT2 also comes with predefined search spaces. The current search spaces were adapted from a combination of the original TPOT package as well as the search spaces used in [AutoSklearn](https://github.com/automl/auto-sklearn/tree/development/autosklearn/pipeline/components). The helper function `tpot2.config.get_search_space` takes in a string or a list of strings, and returns either a EstimatorNode or a ChoicePipeline,respectively. \n", "\n", "strings can correspond to individual methods. Tehre are also special strings that return predefined lists of methods. \n", "\n", diff --git a/tpot2/builtin_modules/column_one_hot_encoder.py b/tpot2/builtin_modules/column_one_hot_encoder.py index 4f3843bf..34c3320e 100644 --- a/tpot2/builtin_modules/column_one_hot_encoder.py +++ b/tpot2/builtin_modules/column_one_hot_encoder.py @@ -44,6 +44,7 @@ def __init__(self, columns='auto', drop=None, handle_unknown='error', sparse_out ---------- columns : str, list, default='auto' + Determines which columns to onehot encode with sklearn.preprocessing.OneHotEncoder. - 'auto' : Automatically select categorical features based on columns with less than 10 unique values - 'categorical' : Automatically select categorical features - 'numeric' : Automatically select numeric features diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 14649f61..6423f328 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -1,31 +1,42 @@ from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +from ConfigSpace import EqualsCondition, OrConjunction, NotEqualsCondition, InCondition +from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING +import numpy as np + #TODO Conditional search space to prevent invalid combinations of hyperparameters -def get_LogisticRegression_ConfigurationSpace(random_state=None): - - space = { - 'solver': Categorical('solver', ['saga','liblinear']), - 'penalty': Categorical("penalty", ['elasticnet','l1', 'l2']), #TODO workaround to support None option? - 'dual': Categorical("dual", [True, False]), - 'C': Float("C", bounds=(1e-4, 1e4), log=True), - - #TODO workaround for including None as a value for class_weight - 'class_weight': Categorical("class_weight", ['balanced']), - 'n_jobs': 1, - 'max_iter': 1000, - } +def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_state): + + dual = n_samples<=n_features + + dual = TRUE_SPECIAL_STRING if dual else FALSE_SPECIAL_STRING + + space = {"solver":"saga", + "max_iter":1000, + "n_jobs":1, + "dual":dual, + } + + penalty = Categorical('penalty', ['l1', 'l2',"elasticnet"], default='l2') + C = Float('C', (0.01, 1e5), log=True) + l1_ratio = Float('l1_ratio', (0.0, 1.0)) + + l1_ratio_condition = EqualsCondition(l1_ratio, penalty, 'elasticnet') if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - return ConfigurationSpace( - space = space - ) + + cs = ConfigurationSpace(space) + cs.add_hyperparameters([penalty, C, l1_ratio]) + cs.add_conditions([l1_ratio_condition]) + + return cs -def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): +def get_KNeighborsClassifier_ConfigurationSpace(n_samples): return ConfigurationSpace( space = { @@ -39,14 +50,14 @@ def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): ) -def get_DecisionTreeClassifier_ConfigurationSpace(random_state=None, n_featues=20): +def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state): space = { 'criterion': Categorical("criterion", ['gini', 'entropy']), - 'max_depth': Integer("max_depth", bounds=(1, 2*n_featues)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_featues)), #max of 20? log scale? + 'min_samples_split': Integer("min_samples_split", bounds=(1, 20)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), - 'max_features': Categorical("max_features", [1.0, 'sqrt', 'log2']), + 'max_features': Categorical("max_features", [NONE_SPECIAL_STRING, 'sqrt', 'log2']), 'min_weight_fraction_leaf': 0.0, } @@ -58,54 +69,66 @@ def get_DecisionTreeClassifier_ConfigurationSpace(random_state=None, n_featues=2 space = space ) +#TODO Conditional search spaces +def get_LinearSVC_ConfigurationSpace(random_state): + space = {"dual":"auto"} + + penalty = Categorical('penalty', ['l1', 'l2']) + C = Float('C', (0.01, 1e5), log=True) + loss = Categorical('loss', ['hinge', 'squared_hinge']) -def get_SVC_ConfigurationSpace(random_state=None): + loss_condition = EqualsCondition(loss, penalty, 'l2') - space = { - 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), - 'C': Float("C", bounds=(1e-4, 25), log=True), - 'degree': Integer("degree", bounds=(1, 4)), - - #'class_weight': Categorical("class_weight", [None, 'balanced']), #TODO add class_weight. configspace doesn't allow None as a value. - 'max_iter': 3000, - 'tol': 0.001, - 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical - } if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - - return ConfigurationSpace( - space = space - ) -#TODO Conditional search spaces -def get_LinearSVC_ConfigurationSpace(random_state=None,): + + cs = ConfigurationSpace(space) + cs.add_hyperparameters([penalty, C, loss]) + cs.add_conditions([loss_condition]) + + return cs + + +def get_SVC_ConfigurationSpace(random_state): + space = { - 'penalty': Categorical("penalty", ['l1', 'l2']), - 'loss': Categorical("loss", ['hinge', 'squared_hinge']), - 'dual': Categorical("dual", [True, False]), - 'C': Float("C", bounds=(1e-4, 25), log=True), - } - + 'max_iter': 3000, + 'probability':TRUE_SPECIAL_STRING} + + kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid']) + C = Float('C', (0.01, 1e5), log=True) + degree = Integer("degree", bounds=(1, 5)) + gamma = Float("gamma", bounds=(1e-5, 8), log=True) + shrinking = Categorical("shrinking", [True, False]) + coef0 = Float("coef0", bounds=(-1, 1)) + + degree_condition = EqualsCondition(degree, kernel, 'poly') + gamma_condition = InCondition(gamma, kernel, ['rbf', 'poly']) + coef0_condition = InCondition(coef0, kernel, ['poly', 'sigmoid']) + if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - - return ConfigurationSpace( - space = space - ) + cs = ConfigurationSpace(space) + cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking]) + cs.add_conditions([degree_condition, gamma_condition, coef0_condition]) + return cs -def get_RandomForestClassifier_ConfigurationSpace(random_state=None): + +def get_RandomForestClassifier_ConfigurationSpace(n_features, random_state): space = { - 'n_estimators': 100, + 'n_estimators': 128, #as recommended by Oshiro et al. (2012 + 'max_features': Integer("max_features", bounds=(1, max(1, n_features))), #log scale like autosklearn? 'criterion': Categorical("criterion", ['gini', 'entropy']), 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), 'bootstrap': Categorical("bootstrap", [True, False]), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), } if random_state is not None: #This is required because configspace doesn't allow None as a value @@ -115,46 +138,21 @@ def get_RandomForestClassifier_ConfigurationSpace(random_state=None): space = space ) -def get_GradientBoostingClassifier_ConfigurationSpace(random_state=None, n_classes=None): - - if n_classes is not None and n_classes > 2: - loss = 'log_loss' - else: - loss = Categorical("loss", ['log_loss', 'exponential']) - - space = { - 'n_estimators': 100, - 'loss': loss, - 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), - 'subsample': Float("subsample", bounds=(0.1, 1.0)), - 'max_features': Float("max_features", bounds=(0.1, 1.0)), - 'max_depth': Integer("max_depth", bounds=(1, 10)), - - #TODO include max leaf nodes? - #TODO validation fraction + n_iter_no_change? maybe as conditional - - 'tol': 1e-4, - } - - if random_state is not None: #This is required because configspace doesn't allow None as a value - space['random_state'] = random_state - - return ConfigurationSpace( - space = space - ) - -def get_XGBClassifier_ConfigurationSpace(random_state=None,): +def get_XGBClassifier_ConfigurationSpace(random_state,): space = { 'n_estimators': 100, 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), - 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'subsample': Float("subsample", bounds=(0.5, 1.0)), 'min_child_weight': Integer("min_child_weight", bounds=(1, 21)), - 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'gamma': Float("gamma", bounds=(1e-4, 20), log=True), + 'max_depth': Integer("max_depth", bounds=(3, 18)), + 'reg_alpha': Float("reg_alpha", bounds=(1e-4, 100), log=True), + 'reg_lambda': Float("reg_lambda", bounds=(1e-4, 1), log=True), 'n_jobs': 1, + 'nthread': 1, + 'verbosity': 0, } if random_state is not None: #This is required because configspace doesn't allow None as a value @@ -164,7 +162,7 @@ def get_XGBClassifier_ConfigurationSpace(random_state=None,): space = space ) -def get_LGBMClassifier_ConfigurationSpace(random_state=None,): +def get_LGBMClassifier_ConfigurationSpace(random_state,): space = { 'objective': 'binary', @@ -184,7 +182,7 @@ def get_LGBMClassifier_ConfigurationSpace(random_state=None,): ) -def get_ExtraTreesClassifier_ConfigurationSpace(random_state=None): +def get_ExtraTreesClassifier_ConfigurationSpace(random_state): space = { 'n_estimators': 100, 'criterion': Categorical("criterion", ["gini", "entropy"]), @@ -204,41 +202,36 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state=None): -def get_SGDClassifier_ConfigurationSpace(random_state=None): +def get_SGDClassifier_ConfigurationSpace(random_state): space = { - 'loss': Categorical("loss", ['log_loss', 'modified_huber']), + 'loss': Categorical("loss", ['squared_hinge', 'modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression 'penalty': 'elasticnet', 'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True), - 'learning_rate': Categorical("learning_rate", ['invscaling', 'constant']), 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), 'eta0': Float("eta0", bounds=(0.01, 1.0)), - 'power_t': Float("power_t", bounds=(1e-5, 100.0), log=True), 'n_jobs': 1, 'fit_intercept': Categorical("fit_intercept", [True]), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), } if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - return ConfigurationSpace( + power_t = Float("power_t", bounds=(1e-5, 100.0), log=True) + learning_rate = Categorical("learning_rate", ['invscaling', 'constant', "optimal"]) + powertcond = EqualsCondition(power_t, learning_rate, 'invscaling') + + + cs = ConfigurationSpace( space = space ) + cs.add_hyperparameters([power_t, learning_rate]) + cs.add_conditions([powertcond]) + return cs -def get_MLPClassifier_ConfigurationSpace(random_state=None): - space = { - 'alpha': Float("alpha", bounds=(1e-4, 1e-1), log=True), - 'learning_rate_init': Float("learning_rate_init", bounds=(1e-3, 1.), log=True), - } - - if random_state is not None: #This is required because configspace doesn't allow None as a value - space['random_state'] = random_state - - return ConfigurationSpace( - space = space - ) GaussianNB_ConfigurationSpace = {} @@ -261,12 +254,11 @@ def get_MultinomialNB_ConfigurationSpace(): -def get_AdaBoostClassifier_ConfigurationSpace(random_state=None): +def get_AdaBoostClassifier_ConfigurationSpace(random_state): space = { 'n_estimators': Integer("n_estimators", bounds=(50, 500)), 'learning_rate': Float("learning_rate", bounds=(0.01, 2), log=True), 'algorithm': Categorical("algorithm", ['SAMME', 'SAMME.R']), - 'max_depth': Integer("max_depth", bounds=(1, 10)), } if random_state is not None: #This is required because configspace doesn't allow None as a value @@ -274,4 +266,172 @@ def get_AdaBoostClassifier_ConfigurationSpace(random_state=None): return ConfigurationSpace( space = space - ) \ No newline at end of file + ) + + +def get_QuadraticDiscriminantAnalysis_ConfigurationSpace(): + return ConfigurationSpace( + space = { + 'reg_param': Float("reg_param", bounds=(0, 1)), + } + ) + +def get_PassiveAggressiveClassifier_ConfigurationSpace(random_state): + space = { + 'C': Float("C", bounds=(1e-5, 10), log=True), + 'loss': Categorical("loss", ['hinge', 'squared_hinge']), + 'average': Categorical("average", [True, False]), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) +#TODO support auto shrinkage when solver is svd. may require custom node +def get_LinearDiscriminantAnalysis_ConfigurationSpace(): + + solver = Categorical("solver", ['svd', 'lsqr', 'eigen']), + shrinkage = Float("shrinkage", bounds=(0, 1)), + + shrinkcond = NotEqualsCondition(shrinkage, solver, 'svd') + + cs = ConfigurationSpace() + cs.add_hyperparameters([solver, shrinkage]) + cs.add_conditions([shrinkcond]) + + return + + + +#### Gradient Boosting Classifiers + +def get_GradientBoostingClassifier_ConfigurationSpace(n_features, random_state): + early_stop = Categorical("early_stop", ["off", "valid", "train"]) + n_iter_no_change = Integer("n_iter_no_change",bounds=(1,20)) + validation_fraction = Float("validation_fraction", bounds=(0.01, 0.4)) + + n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop, ["valid", "train"] ) + validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid") + + space = { + 'loss': Categorical("loss", ['log_loss', 'exponential']), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'max_features': Integer("max_features", bounds=(1, max(1, n_features))), + 'max_leaf_nodes': Integer("max_leaf_nodes", bounds=(3, 2047)), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_features)), + 'tol': 1e-4, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + cs.add_hyperparameters([n_iter_no_change, validation_fraction, early_stop ]) + cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond]) + return cs + + + + +#only difference is l2_regularization +def get_HistGradientBoostingClassifier_ConfigurationSpace(n_features, random_state): + early_stopping = Categorical("early_stopping", ["off", "valid", "train"]) + n_iter_no_change = Integer("n_iter_no_change",bounds=(1,20)) + validation_fraction = Float("validation_fraction", bounds=(0.01, 0.4)) + + n_iter_no_change_cond = InCondition(n_iter_no_change, early_stopping, ["valid", "train"] ) + validation_fraction_cond = EqualsCondition(validation_fraction, early_stopping, "valid") + + space = { + 'loss': Categorical("loss", ['log_loss', 'exponential']), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), + 'max_features': Float("max_features", bounds=(0.1,1.0)), + 'max_leaf_nodes': Integer("max_leaf_nodes", bounds=(3, 2047)), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_features)), + 'l2_regularization': Float("l2_regularization", bounds=(1e-10, 1), log=True), + 'tol': 1e-4, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + cs.add_hyperparameters([n_iter_no_change, validation_fraction, early_stopping ]) + cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond]) + + return cs + +def GradientBoostingClassifier_hyperparameter_parser(params): + + final_params = { + 'loss': params['loss'], + 'learning_rate': params['learning_rate'], + 'min_samples_leaf': params['min_samples_leaf'], + 'min_samples_split': params['min_samples_split'], + 'subsample': params['subsample'], + 'max_features': params['max_features'], + 'max_leaf_nodes': params['max_leaf_nodes'], + 'max_depth': params['max_depth'], + 'tol': params['tol'], + } + + if "l2_regularization" in params: + final_params['l2_regularization'] = params['l2_regularization'] + + if params['early_stop'] == 'off': + final_params['n_iter_no_change'] = None + final_params['validation_fraction'] = None + elif params['early_stop'] == 'valid': + final_params['n_iter_no_change'] = params['n_iter_no_change'] + final_params['validation_fraction'] = params['validation_fraction'] + elif params['early_stop'] == 'train': + final_params['n_iter_no_change'] = params['n_iter_no_change'] + final_params['validation_fraction'] = None + + + return final_params + + +### + +def get_MLPClassifier_ConfigurationSpace(random_state): + space = {"n_iter_no_change":32} + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + + n_hidden_layers = Integer("n_hidden_layers", bounds=(1, 3)) + n_nodes_per_layer = Integer("n_nodes_per_layer", bounds=(16, 512)) + activation = Categorical("activation", ['tanh', 'relu']) + alpha = Float("alpha", bounds=(1e-7, 1e-1), log=True) + learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True) + early_stopping = Categorical("early_stopping", [True,False]) + + cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping]) + + return cs + +def MLPClassifier_hyperparameter_parser(params): + hyperparameters = { + 'n_iter_no_change': params['n_iter_no_change'], + 'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'], + 'activation': params['activation'], + 'alpha': params['alpha'], + 'learning_rate': params['learning_rate'], + 'early_stopping': params['early_stopping'], + } + return hyperparameters \ No newline at end of file diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py index a158a9a6..ad581898 100644 --- a/tpot2/config/classifiers_sklearnex.py +++ b/tpot2/config/classifiers_sklearnex.py @@ -2,7 +2,7 @@ from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -def get_RandomForestClassifier_ConfigurationSpace(random_state=None): +def get_RandomForestClassifier_ConfigurationSpace(random_state): space = { 'n_estimators': 100, #TODO make this a higher number? learned? 'bootstrap': Categorical("bootstrap", [True, False]), @@ -19,7 +19,7 @@ def get_RandomForestClassifier_ConfigurationSpace(random_state=None): space = space ) -def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): +def get_KNeighborsClassifier_ConfigurationSpace(n_samples): return ConfigurationSpace( space = { 'n_neighbors': Integer("n_neighbors", bounds=(1, max(n_samples, 100)), log=True), @@ -29,7 +29,7 @@ def get_KNeighborsClassifier_ConfigurationSpace(n_samples=10): #TODO add conditionals -def get_LogisticRegression_ConfigurationSpace(random_state=None): +def get_LogisticRegression_ConfigurationSpace(random_state): space = { 'solver': Categorical("solver", ['liblinear', 'sag', 'saga']), 'penalty': Categorical("penalty", ['l1', 'l2']), @@ -45,7 +45,7 @@ def get_LogisticRegression_ConfigurationSpace(random_state=None): space = space ) -def get_SVC_ConfigurationSpace(random_state=None): +def get_SVC_ConfigurationSpace(random_state): space = { 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), 'C': Float("C", bounds=(1e-4, 25), log=True), @@ -62,7 +62,7 @@ def get_SVC_ConfigurationSpace(random_state=None): space = space ) -def get_NuSVC_ConfigurationSpace(random_state=None): +def get_NuSVC_ConfigurationSpace(random_state): space = { 'nu': Float("nu", bounds=(0.05, 1.0)), 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 44892278..cf75cd47 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -52,6 +52,7 @@ from sklearn.kernel_approximation import RBFSampler from sklearn.preprocessing import RobustScaler from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import PowerTransformer, QuantileTransformer from sklearn.feature_selection import SelectFwe @@ -62,13 +63,12 @@ import sklearn.feature_selection - +#TODO create a selectomixin using these? from sklearn.feature_selection import f_classif from sklearn.feature_selection import f_regression from sklearn.linear_model import SGDRegressor -from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge from sklearn.linear_model import Lasso from sklearn.linear_model import ElasticNet @@ -76,21 +76,23 @@ from sklearn.linear_model import LassoLars, LassoLarsCV from sklearn.linear_model import RidgeCV +from sklearn.svm import SVR, SVC +from sklearn.svm import LinearSVR, LinearSVC -from sklearn.svm import SVR -from sklearn.svm import LinearSVR - -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor,RandomForestRegressor +from sklearn.ensemble import AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor from sklearn.ensemble import BaggingRegressor from sklearn.ensemble import ExtraTreesRegressor +from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import ElasticNetCV -from xgboost import XGBRegressor +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from tpot2.builtin_modules import RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor +from sklearn.gaussian_process import GaussianProcessRegressor + +from xgboost import XGBRegressor from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer @@ -99,8 +101,11 @@ #MDR -all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, RFE_ExtraTreesClassifier, SelectFromModel_ExtraTreesClassifier, RFE_ExtraTreesRegressor, SelectFromModel_ExtraTreesRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, RFE, SelectFromModel, f_classif, f_regression, SGDRegressor, LinearRegression, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, +all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, + AdaBoostClassifier, + GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor, AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer, + PowerTransformer, QuantileTransformer, ] @@ -118,36 +123,21 @@ all_methods.append(MultiSURF) if 'sklearnex' in sys.modules: - from sklearnex.linear_model import LinearRegression - from sklearnex.linear_model import Ridge - from sklearnex.linear_model import Lasso - from sklearnex.linear_model import ElasticNet - from sklearnex.svm import SVR - from sklearnex.svm import NuSVR - from sklearnex.ensemble import RandomForestRegressor - from sklearnex.neighbors import KNeighborsRegressor - - from sklearnex.ensemble import RandomForestClassifier - from sklearnex.neighbors import KNeighborsClassifier - from sklearnex.svm import SVC - from sklearnex.svm import NuSVC - from sklearnex.linear_model import LogisticRegression - - - all_methods.append(LinearRegression) - all_methods.append(Ridge) - all_methods.append(Lasso) - all_methods.append(ElasticNet) - all_methods.append(SVR) - all_methods.append(NuSVR) - all_methods.append(RandomForestRegressor) - all_methods.append(KNeighborsRegressor) - - all_methods.append(RandomForestClassifier) - all_methods.append(KNeighborsClassifier) - all_methods.append(SVC) - all_methods.append(NuSVC) - all_methods.append(LogisticRegression) + import sklearnex + + all_methods.append(sklearnex.linear_model.LinearRegression) + all_methods.append(sklearnex.linear_model.Ridge) + all_methods.append(sklearnex.linear_model.Lasso) + all_methods.append(sklearnex.linear_model.ElasticNet) + all_methods.append(sklearnex.svm.SVR) + all_methods.append(sklearnex.svm.NuSVR) + all_methods.append(sklearnex.ensemble.RandomForestRegressor) + all_methods.append(sklearnex.neighbors.KNeighborsRegressor) + all_methods.append(sklearnex.ensemble.RandomForestClassifier) + all_methods.append(sklearnex.neighbors.KNeighborsClassifier) + all_methods.append(sklearnex.svm.SVC) + all_methods.append(sklearnex.svm.NuSVC) + all_methods.append(sklearnex.linear_model.LogisticRegression) STRING_TO_CLASS = { @@ -155,15 +145,18 @@ } - +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.linear_model import ARDRegression +from sklearn.gaussian_process import GaussianProcessRegressor GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], - "classifiers" : ["LogisticRegression", "DecisionTreeClassifier", "KNeighborsClassifier", "GradientBoostingClassifier", "ExtraTreesClassifier", "RandomForestClassifier", "SGDClassifier", "GaussianNB", "BernoulliNB", "MultinomialNB", "XGBClassifier", "SVC", "MLPClassifier"], - "regressors" : ["ElasticNetCV", "ExtraTreesRegressor", "GradientBoostingRegressor", "AdaBoostRegressor", "DecisionTreeRegressor", "KNeighborsRegressor", "LassoLarsCV", "SVR", "RandomForestRegressor", "RidgeCV", "XGBRegressor", "SGDRegressor" ], - "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler"], + "classifiers" : ['AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "PassiveAggressiveClassifier", "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], + "regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearDiscriminantAnalysis', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], + "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"], "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"], "imputers": [], "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], @@ -194,20 +187,24 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta #classifiers.py + case "AdaBoostClassifier": + return classifiers.get_AdaBoostClassifier_ConfigurationSpace(random_state=random_state) case "LogisticRegression": - return classifiers.get_LogisticRegression_ConfigurationSpace(random_state=random_state) + return classifiers.get_LogisticRegression_ConfigurationSpace(n_samples=n_samples, n_features=n_features, random_state=random_state) case "KNeighborsClassifier": return classifiers.get_KNeighborsClassifier_ConfigurationSpace(n_samples=n_samples) case "DecisionTreeClassifier": - return classifiers.get_DecisionTreeClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_DecisionTreeClassifier_ConfigurationSpace(n_featues=n_features, random_state=random_state) case "SVC": return classifiers.get_SVC_ConfigurationSpace(random_state=random_state) case "LinearSVC": return classifiers.get_LinearSVC_ConfigurationSpace(random_state=random_state) case "RandomForestClassifier": - return classifiers.get_RandomForestClassifier_ConfigurationSpace(random_state=random_state) + return classifiers.get_RandomForestClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state) case "GradientBoostingClassifier": - return classifiers.get_GradientBoostingClassifier_ConfigurationSpace(n_classes=n_classes) + return classifiers.get_GradientBoostingClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state) + case "HistGradientBoostingClassifier": + return classifiers.get_HistGradientBoostingClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state) case "XGBClassifier": return classifiers.get_XGBClassifier_ConfigurationSpace(random_state=random_state) case "LGBMClassifier": @@ -224,7 +221,63 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta return classifiers.get_MultinomialNB_ConfigurationSpace() case "GaussianNB": return {} - + case "LassoLarsCV": + return {} + case "ElasticNetCV": + return regressors.ElasticNetCV_configspace + case "RidgeCV": + return {} + + #regressors.py + case "RandomForestRegressor": + return regressors.get_RandomForestRegressor_ConfigurationSpace(random_state=random_state) + case "SGDRegressor": + return regressors.get_SGDRegressor_ConfigurationSpace(random_state=random_state) + case "Ridge": + return regressors.get_Ridge_ConfigurationSpace(random_state=random_state) + case "Lasso": + return regressors.get_Lasso_ConfigurationSpace(random_state=random_state) + case "ElasticNet": + return regressors.get_ElasticNet_ConfigurationSpace(random_state=random_state) + case "Lars": + return regressors.get_Lars_ConfigurationSpace(random_state=random_state) + case "OthogonalMatchingPursuit": + return regressors.get_OthogonalMatchingPursuit_ConfigurationSpace() + case "BayesianRidge": + return regressors.get_BayesianRidge_ConfigurationSpace() + case "LassoLars": + return regressors.get_LassoLars_ConfigurationSpace(random_state=random_state) + case "BaggingRegressor": + return regressors.get_BaggingRegressor_ConfigurationSpace(random_state=random_state) + case "ARDRegression": + return regressors.get_ARDRegression_ConfigurationSpace() + case "TheilSenRegressor": + return regressors.get_TheilSenRegressor_ConfigurationSpace(random_state=random_state) + case "Perceptron": + return regressors.get_Perceptron_ConfigurationSpace(random_state=random_state) + case "DecisionTreeRegressor": + return regressors.get_DecisionTreeRegressor_ConfigurationSpace(n_features=n_features, random_state=random_state) + case "LinearSVR": + return regressors.get_LinearSVR_ConfigurationSpace(random_state=random_state) + case "SVR": + return regressors.get_SVR_ConfigurationSpace() + case "XGBRegressor": + return regressors.get_XGBRegressor_ConfigurationSpace(random_state=random_state) + case "AdaBoostRegressor": + return regressors.get_AdaBoostRegressor_ConfigurationSpace(random_state=random_state) + case "ExtraTreesRegressor": + return regressors.get_ExtraTreesRegressor_ConfigurationSpace(random_state=random_state) + case "GradientBoostingRegressor": + return regressors.get_GradientBoostingRegressor_ConfigurationSpace(n_features=n_features, random_state=random_state) + case "HistGradientBoostingRegressor": + return regressors.get_HistGradientBoostingRegressor_ConfigurationSpace(n_features=n_features, random_state=random_state) + case "MLPRegressor": + return regressors.get_MLPRegressor_ConfigurationSpace(random_state=random_state) + case "KNeighborsRegressor": + return regressors.get_KNeighborsRegressor_ConfigurationSpace(n_samples=n_samples) + case "GaussianProcessRegressor": + return regressors.get_GaussianProcessRegressor_ConfigurationSpace(n_features=n_features, random_state=random_state) + #transformers.py case "Binarizer": return transformers.Binarizer_configspace @@ -244,7 +297,23 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta return transformers.get_Nystroem_configspace(n_features=n_features, random_state=random_state) case "RBFSampler": return transformers.get_RBFSampler_configspace(n_features=n_features, random_state=random_state) - + case "MinMaxScaler": + return {} + case "PowerTransformer": + return {} + case "QuantileTransformer": + return transformers.get_QuantileTransformer_configspace(random_state=random_state) + case "RobustScaler": + return transformers.RobustScaler_configspace + case "ColumnOneHotEncoder": + return {} + case "MaxAbsScaler": + return {} + case "PolynomialFeatures": + return transformers.PolynomialFeatures_configspace + case "StandardScaler": + return {} + #selectors.py case "SelectFwe": return selectors.SelectFwe_configspace @@ -256,6 +325,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta return selectors.RFE_configspace_part case "SelectFromModel": return selectors.SelectFromModel_configspace_part + #special_configs.py case "AddTransformer": @@ -291,7 +361,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta space = { - 'n': Float("n", bounds=(-1e3, 1e3), log=True), + 'n': Float("n", bounds=(-1e3, 1e3)), } ) @@ -341,7 +411,8 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta case "KNeighborsRegressor_sklearnex": return regressors_sklearnex.get_KNeighborsRegressor_ConfigurationSpace(n_samples=n_samples) - return {} + #raise error + raise ValueError(f"Could not find configspace for {name}") def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None): @@ -359,9 +430,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) if name is None: + warnings.warn(f"name is None") return None if name not in STRING_TO_CLASS: + print("FOOO ", name) + warnings.warn(f"Could not find class for {name}") return None return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) @@ -370,6 +444,8 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None): #these are wrappers that take in another estimator as a parameter + # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? + # TODO add other meta-estimators? if name == "RFE_classification": rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) @@ -386,7 +462,26 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) - + + #these are nodes that have special search spaces which require custom parsing of the hyperparameters + if name == "RobustScaler": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) + if name == "GradientBoostingClassifier" or name == "HistGradientBoosting": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) + if name == "GradientBoostingRegressor" or name == "HistGradientBoostingRegressor": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) + if name == "MLPClassifier": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) + if name == "MLPRegressor": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) + if name == "GaussianProcessRegressor": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) if configspace is None: diff --git a/tpot2/config/mdr_configs.py b/tpot2/config/mdr_configs.py index b99ec81e..df92cd17 100644 --- a/tpot2/config/mdr_configs.py +++ b/tpot2/config/mdr_configs.py @@ -14,7 +14,7 @@ -def get_skrebate_ReliefF_config_space(n_features=10): +def get_skrebate_ReliefF_config_space(n_features): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), @@ -23,7 +23,7 @@ def get_skrebate_ReliefF_config_space(n_features=10): ) -def get_skrebate_SURF_config_space(n_features=10): +def get_skrebate_SURF_config_space(n_features): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), @@ -31,13 +31,13 @@ def get_skrebate_SURF_config_space(n_features=10): ) -def get_skrebate_SURFstar_config_space(n_features=10): +def get_skrebate_SURFstar_config_space(n_features): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), } ) -def get_skrebate_MultiSURF_config_space(n_features=10): +def get_skrebate_MultiSURF_config_space(n_features): return ConfigurationSpace( space = { 'n_features_to_select': Integer('n_features_to_select', bounds=(1, n_features), log=True), diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index 845f9ff1..e87e9eda 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -1,37 +1,21 @@ -from sklearn.linear_model import SGDRegressor -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import Ridge -from sklearn.linear_model import Lasso -from sklearn.linear_model import ElasticNet -from sklearn.linear_model import Lars -from sklearn.linear_model import LassoLars, LassoLarsCV -from sklearn.linear_model import RidgeCV - - -from sklearn.svm import SVR -from sklearn.svm import LinearSVR - -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor,RandomForestRegressor -from sklearn.ensemble import BaggingRegressor -from sklearn.ensemble import ExtraTreesRegressor -from sklearn.tree import DecisionTreeRegressor -from sklearn.neighbors import KNeighborsRegressor -from sklearn.linear_model import ElasticNetCV - -from xgboost import XGBRegressor -from functools import partial - - +import sklearn from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal - - +from ConfigSpace import EqualsCondition, OrConjunction, NotEqualsCondition, InCondition +from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING +import numpy as np #TODO: fill in remaining #TODO check for places were we could use log scaling -def get_RandomForestRegressor_ConfigurationSpace(random_state=None): + +ElasticNetCV_configspace = { + "l1_ratio" : np.arange(0.0, 1.01, 0.05), +} + +def get_RandomForestRegressor_ConfigurationSpace(random_state): space = { 'n_estimators': 100, + 'criterion': Categorical("criterion", ['mse', 'mae', "friedman_mse"]), 'max_features': Float("max_features", bounds=(0.05, 1.0)), 'bootstrap': Categorical("bootstrap", [True, False]), 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), @@ -46,27 +30,49 @@ def get_RandomForestRegressor_ConfigurationSpace(random_state=None): ) -def get_SGDRegressor_ConfigurationSpace(random_state=None): +def get_SGDRegressor_ConfigurationSpace(random_state): space = { - 'loss': Categorical("loss", ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']), - 'penalty': 'elasticnet', - 'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True), - 'learning_rate': Categorical("learning_rate", ['invscaling', 'constant']), - 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), - 'eta0': Float("eta0", bounds=(0.01, 1.0)), - 'power_t': Float("power_t", bounds=(1e-5, 100.0), log=True), + 'alpha': Float("alpha", bounds=(1e-7, 1e-1), log=True), + 'average': Categorical("average", [True, False]), 'fit_intercept': Categorical("fit_intercept", [True]), } if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - return ConfigurationSpace( + cs = ConfigurationSpace( space = space ) + l1_ratio = Float("l1_ratio", bounds=(1e-7, 1.0), log=True) + penalty = Categorical("penalty", ["l1", "l2", "elasticnet"]) + epsilon = Float("epsilon", bounds=(1e-5, 1e-1), log=True) + loss = Categorical("loss", ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive",]) + eta0 = Float("eta0", bounds=(1e-7, 1e-1), log=True) + learning_rate = Categorical("learning_rate", ['optimal', 'invscaling', 'constant']) + power_t = Float("power_t", bounds=(1e-5, 1.0), log=True) + + elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet") + epsilon_condition = InCondition( + epsilon, + loss, + ["huber", "epsilon_insensitive", "squared_epsilon_insensitive"], + ) + + eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling", "constant"]) + power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") + + cs.add_hyperparameters( + [l1_ratio, penalty, epsilon, loss, eta0, learning_rate, power_t] + ) + cs.add_conditions( + [elasticnet, epsilon_condition, power_t_condition, eta0_in_inv_con] + ) + + return cs -def get_Ridge_ConfigurationSpace(random_state=None): + +def get_Ridge_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'fit_intercept': Categorical("fit_intercept", [True]), @@ -81,7 +87,7 @@ def get_Ridge_ConfigurationSpace(random_state=None): space = space ) -def get_Lasso_ConfigurationSpace(random_state=None): +def get_Lasso_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'fit_intercept': Categorical("fit_intercept", [True]), @@ -95,7 +101,7 @@ def get_Lasso_ConfigurationSpace(random_state=None): space = space ) -def get_ElasticNet_ConfigurationSpace(random_state=None): +def get_ElasticNet_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), @@ -109,7 +115,7 @@ def get_ElasticNet_ConfigurationSpace(random_state=None): ) -def get_Lars_ConfigurationSpace(random_state=None): +def get_Lars_ConfigurationSpace(random_state): space = { } @@ -138,7 +144,7 @@ def get_BayesianRidge_ConfigurationSpace(): ) -def get_LassoLars_ConfigurationSpace(random_state=None): +def get_LassoLars_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'eps': Float("eps", bounds=(1e-5, 1e-1), log=True), @@ -151,15 +157,8 @@ def get_LassoLars_ConfigurationSpace(random_state=None): space = space ) -def get_LassoLarsCV_ConfigurationSpace(cv): - return ConfigurationSpace( - space = { - 'cv': cv, - } - ) - -def get_BaggingRegressor_ConfigurationSpace(random_state=None): +def get_BaggingRegressor_ConfigurationSpace(random_state): space = { 'max_samples': Float("max_samples", bounds=(0.05, 1.00)), 'max_features': Float("max_features", bounds=(0.05, 1.00)), @@ -178,19 +177,19 @@ def get_ARDRegression_ConfigurationSpace(): return ConfigurationSpace( space = { - 'alpha_1': Float("alpha_1", bounds=(1e-6, 1e-1), log=True), - 'alpha_2': Float("alpha_2", bounds=(1e-6, 1e-1), log=True), - 'lambda_1': Float("lambda_1", bounds=(1e-6, 1e-1), log=True), - 'lambda_2': Float("lambda_2", bounds=(1e-6, 1e-1), log=True), - 'threshold_lambda': Integer("threshold_lambda", bounds=(100, 1000)), + 'alpha_1': Float("alpha_1", bounds=(1e-10, 1e-3), log=True), + 'alpha_2': Float("alpha_2", bounds=(1e-10, 1e-3), log=True), + 'lambda_1': Float("lambda_1", bounds=(1e-10, 1e-3), log=True), + 'lambda_2': Float("lambda_2", bounds=(1e-10, 1e-3), log=True), + 'threshold_lambda': Integer("threshold_lambda", bounds=(1e3, 1e5)), } ) -def get_TheilSenRegressor_ConfigurationSpace(random_state=None): +def get_TheilSenRegressor_ConfigurationSpace(random_state): space = { - 'n_subsamples': Integer("n_subsamples", bounds=(10, 100)), - 'max_subpopulation': Integer("max_subpopulation", bounds=(100, 1000)), + 'n_subsamples': Integer("n_subsamples", bounds=(10, 10000)), + 'max_subpopulation': Integer("max_subpopulation", bounds=(10, 1000)), } if random_state is not None: #This is required because configspace doesn't allow None as a value @@ -201,21 +200,10 @@ def get_TheilSenRegressor_ConfigurationSpace(random_state=None): ) -def get_SVR_ConfigurationSpace(): - return ConfigurationSpace( - space = { - 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), - 'C': Float("C", bounds=(1e-4, 25), log=True), - 'degree': Integer("degree", bounds=(1, 4)), - 'max_iter': 3000, - 'tol': 0.005, - } - ) - -def get_Perceptron_ConfigurationSpace(random_state=None): +def get_Perceptron_ConfigurationSpace(random_state): space = { - 'penalty': Categorical("penalty", [None, 'l2', 'l1', 'elasticnet']), + 'penalty': Categorical("penalty", [NONE_SPECIAL_STRING, 'l2', 'l1', 'elasticnet']), 'alpha': Float("alpha", bounds=(1e-5, 1e-1), log=True), 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), 'learning_rate': Categorical("learning_rate", ['constant', 'optimal', 'invscaling']), @@ -229,36 +217,12 @@ def get_Perceptron_ConfigurationSpace(random_state=None): space = space ) -def get_MLPRegressor_ConfigurationSpace(random_state=None): - space = { - 'alpha': Float("alpha", bounds=(1e-4, 1e-1), log=True), - 'learning_rate_init': Float("learning_rate_init", bounds=(1e-3, 1.), log=True), - } - - if random_state is not None: #This is required because configspace doesn't allow None as a value - space['random_state'] = random_state - - return ConfigurationSpace( - space = space - ) - - -def get_GradientBoostingRegressor_ConfigurationSpace(random_state=None): - space = { - 'n_estimators': 100, - 'loss': Categorical("loss", ['ls', 'lad', 'huber', 'quantile']), - 'learning_rate': Float("learning_rate", bounds=(1e-4, 1), log=True), - 'max_depth': Integer("max_depth", bounds=(1, 11)), - 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), - 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), - 'subsample': Float("subsample", bounds=(0.05, 1.00)), - 'max_features': Float("max_features", bounds=(0.05, 1.00)), - } -def get_DecisionTreeRegressor_ConfigurationSpace(random_state=None): +def get_DecisionTreeRegressor_ConfigurationSpace(n_features, random_state): space = { - 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'criterion': Categorical("criterion", ['squared_error', 'friedman_mse', 'mae']), + 'max_depth': Integer("max_depth", bounds=(1, n_features*2)), 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), } @@ -268,21 +232,22 @@ def get_DecisionTreeRegressor_ConfigurationSpace(random_state=None): ) -def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): +def get_KNeighborsRegressor_ConfigurationSpace(n_samples): return ConfigurationSpace( space = { - 'n_neighbors': Integer("n_neighbors", bounds=(1, n_samples)), + 'n_neighbors': Integer("n_neighbors", bounds=(1, min(100,n_samples))), 'weights': Categorical("weights", ['uniform', 'distance']), 'p': Integer("p", bounds=(1, 3)), 'metric': Categorical("metric", ['minkowski', 'euclidean', 'manhattan']), } ) -def get_LinearSVR_ConfigurationSpace(random_state=None): + +def get_LinearSVR_ConfigurationSpace(random_state): space = { 'epsilon': Float("epsilon", bounds=(1e-4, 1.0), log=True), - 'C': Float("C", bounds=(1e-4, 25.0), log=True), - 'dual': Categorical("dual", [True, False]), + 'C': Float('C', (0.01, 1e5), log=True), + 'dual': "auto", 'loss': Categorical("loss", ['epsilon_insensitive', 'squared_epsilon_insensitive']), } @@ -293,14 +258,49 @@ def get_LinearSVR_ConfigurationSpace(random_state=None): space = space ) +#add coef0? +def get_SVR_ConfigurationSpace(): + space = { + 'epislon': Float("epsilon", bounds=(1e-4, 1.0), log=True), + 'shrinking': Categorical("shrinking", [True, False]), + 'C': Float('C', (0.01, 1e5), log=True), + 'max_iter': 3000, + 'tol': 0.005, + } + + cs = ConfigurationSpace( + space = space + ) + + kernel = Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']) + degree = Integer("degree", bounds=(1, 5)) + gamma = Float("gamma", bounds=(1e-5, 10.0), log=True) + coef0 = Float("coef0", bounds=(-1, 1)) + + + degree_condition = EqualsCondition(degree, kernel, 'poly') + gamma_condition = InCondition(gamma, kernel, ['poly', 'rbf',]) + coef0_condition = InCondition(coef0, kernel, ['poly', 'sigmoid']) + + cs.add_hyperparameters([kernel, degree, gamma, coef0]) + cs.add_conditions([degree_condition,gamma_condition]) + + return cs + + -def get_XGBRegressor_ConfigurationSpace(random_state=None): + +def get_XGBRegressor_ConfigurationSpace(random_state): space = { + 'n_estimators': 100, 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), - 'subsample': Float("subsample", bounds=(0.05, 1.0)), + 'subsample': Float("subsample", bounds=(0.5, 1.0)), 'min_child_weight': Integer("min_child_weight", bounds=(1, 21)), - 'n_estimators': 100, - 'max_depth': Integer("max_depth", bounds=(1, 11)), + 'gamma': Float("gamma", bounds=(1e-4, 20), log=True), + 'max_depth': Integer("max_depth", bounds=(3, 18)), + 'reg_alpha': Float("reg_alpha", bounds=(1e-4, 100), log=True), + 'reg_lambda': Float("reg_lambda", bounds=(1e-4, 1), log=True), + 'n_jobs': 1, 'nthread': 1, 'verbosity': 0, 'objective': 'reg:squarederror', @@ -314,11 +314,11 @@ def get_XGBRegressor_ConfigurationSpace(random_state=None): ) -def get_AdaBoostRegressor_ConfigurationSpace(random_state=None): +def get_AdaBoostRegressor_ConfigurationSpace(random_state): space = { - 'n_estimators': Integer("n_estimators", bounds=(50, 100)), - 'learning_rate': Float("learning_rate", bounds=(1e-3, 1.0), log=True), + 'n_estimators': Integer("n_estimators", bounds=(50, 500)), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 2.0), log=True), 'loss': Categorical("loss", ['linear', 'square', 'exponential']), } @@ -330,9 +330,10 @@ def get_AdaBoostRegressor_ConfigurationSpace(random_state=None): space = space ) -def get_ExtraTreesRegressor_ConfigurationSpace(random_state=None): +def get_ExtraTreesRegressor_ConfigurationSpace(random_state): space = { 'n_estimators': 100, + 'criterion': Categorical("criterion", ["squared_error", "friedman_mse", "mae"]), 'max_features': Float("max_features", bounds=(0.05, 1.0)), 'min_samples_split': Integer("min_samples_split", bounds=(2, 21)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 21)), @@ -344,4 +345,168 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state=None): return ConfigurationSpace( space = space - ) \ No newline at end of file + ) +### + +def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state): + space = { + 'n_features': n_features, + 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), + 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def GaussianProcessRegressor_hyperparameter_parser(params): + kernel = sklearn.gaussian_process.kernels.RBF( + length_scale = [1.0]*params['n_features'], + length_scale_bounds=[(params['thetaL'], params['thetaU'])] * params['n_features'], + ) + final_params = {"kernel": kernel, + "alpha": params['alpha'], + "n_restarts_optimizer": 10, + "optimizer": "fmin_l_bfgs_b", + "normalize_y": True, + "copy_X_train": True, + } + + if "random_state" in params: + final_params['random_state'] = params['random_state'] + + return final_params + +### +def get_GradientBoostingRegressor_ConfigurationSpace(n_features, random_state): + early_stop = Categorical("early_stop", ["off", "valid", "train"]) + n_iter_no_change = Integer("n_iter_no_change",bounds=(1,20)) + validation_fraction = Float("validation_fraction", bounds=(0.01, 0.4)) + + n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop, ["valid", "train"] ) + validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid") + + space = { + 'loss': Categorical("loss", ['log_loss', 'exponential']), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), + 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), + 'subsample': Float("subsample", bounds=(0.1, 1.0)), + 'max_features': Integer("max_features", bounds=(1, max(1, n_features))), + 'max_leaf_nodes': Integer("max_leaf_nodes", bounds=(3, 2047)), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_features)), + 'tol': 1e-4, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + cs.add_hyperparameters([n_iter_no_change, validation_fraction, early_stop ]) + cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond]) + return cs + +#only difference is l2_regularization +def get_HistGradientBoostingRegressor_ConfigurationSpace(n_features, random_state): + early_stop = Categorical("early_stop", ["off", "valid", "train"]) + n_iter_no_change = Integer("n_iter_no_change",bounds=(1,20)) + validation_fraction = Float("validation_fraction", bounds=(0.01, 0.4)) + + n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop, ["valid", "train"] ) + validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid") + + space = { + 'loss': Categorical("loss", ['log_loss', 'exponential']), + 'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True), + 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)), + 'max_features': Float("max_features", bounds=(0.1,1.0)), + 'max_leaf_nodes': Integer("max_leaf_nodes", bounds=(3, 2047)), + 'max_depth': Integer("max_depth", bounds=(1, 2*n_features)), + 'l2_regularization': Float("l2_regularization", bounds=(1e-10, 1), log=True), + 'tol': 1e-4, + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + cs.add_hyperparameters([n_iter_no_change, validation_fraction, early_stop ]) + cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond]) + + return cs + +def GradientBoostingRegressor_hyperparameter_parser(params): + + final_params = { + 'loss': params['loss'], + 'learning_rate': params['learning_rate'], + 'min_samples_leaf': params['min_samples_leaf'], + 'max_features': params['max_features'], + 'max_leaf_nodes': params['max_leaf_nodes'], + 'max_depth': params['max_depth'], + 'tol': params['tol'], + } + + if "l2_regularization" in params: + final_params['l2_regularization'] = params['l2_regularization'] + + if params['early_stop'] == 'off': + final_params['n_iter_no_change'] = None + final_params['validation_fraction'] = None + elif params['early_stop'] == 'valid': + final_params['n_iter_no_change'] = params['n_iter_no_change'] + final_params['validation_fraction'] = params['validation_fraction'] + elif params['early_stop'] == 'train': + final_params['n_iter_no_change'] = params['n_iter_no_change'] + final_params['validation_fraction'] = None + + + return final_params + + + +### + +def get_MLPRegressor_ConfigurationSpace(random_state): + space = {"n_iter_no_change":32} + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace( + space = space + ) + + n_hidden_layers = Integer("n_hidden_layers", bounds=(1, 3)) + n_nodes_per_layer = Integer("n_nodes_per_layer", bounds=(16, 512)) + activation = Categorical("activation", ['tanh', 'relu']) + alpha = Float("alpha", bounds=(1e-7, 1e-1), log=True) + learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True) + early_stopping = Categorical("early_stopping", [True,False]) + + cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping]) + + return cs + +def MLPRegressor_hyperparameter_parser(params): + hyperparameters = { + 'n_iter_no_change': params['n_iter_no_change'], + 'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'], + 'activation': params['activation'], + 'alpha': params['alpha'], + 'learning_rate': params['learning_rate'], + 'early_stopping': params['early_stopping'], + } + return hyperparameters + + + + \ No newline at end of file diff --git a/tpot2/config/regressors_sklearnex.py b/tpot2/config/regressors_sklearnex.py index 3473de56..7346a7c3 100644 --- a/tpot2/config/regressors_sklearnex.py +++ b/tpot2/config/regressors_sklearnex.py @@ -3,7 +3,7 @@ -def get_RandomForestRegressor_ConfigurationSpace(random_state=None): +def get_RandomForestRegressor_ConfigurationSpace(random_state): space = { 'n_estimators': 100, 'max_features': Float("max_features", bounds=(0.05, 1.0)), @@ -20,7 +20,7 @@ def get_RandomForestRegressor_ConfigurationSpace(random_state=None): ) -def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): +def get_KNeighborsRegressor_ConfigurationSpace(n_samples): return ConfigurationSpace( space = { 'n_neighbors': Integer("n_neighbors", bounds=(1, max(n_samples, 100))), @@ -29,7 +29,7 @@ def get_KNeighborsRegressor_ConfigurationSpace(n_samples=100): ) -def get_Ridge_ConfigurationSpace(random_state=None): +def get_Ridge_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'fit_intercept': Categorical("fit_intercept", [True]), @@ -43,7 +43,7 @@ def get_Ridge_ConfigurationSpace(random_state=None): space = space ) -def get_Lasso_ConfigurationSpace(random_state=None): +def get_Lasso_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'fit_intercept': Categorical("fit_intercept", [True]), @@ -60,7 +60,7 @@ def get_Lasso_ConfigurationSpace(random_state=None): space = space ) -def get_ElasticNet_ConfigurationSpace(random_state=None): +def get_ElasticNet_ConfigurationSpace(random_state): space = { 'alpha': Float("alpha", bounds=(0.0, 1.0)), 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), @@ -74,7 +74,7 @@ def get_ElasticNet_ConfigurationSpace(random_state=None): ) -def get_SVR_ConfigurationSpace(random_state=None): +def get_SVR_ConfigurationSpace(random_state): space = { 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), 'C': Float("C", bounds=(1e-4, 25), log=True), @@ -90,7 +90,7 @@ def get_SVR_ConfigurationSpace(random_state=None): space = space ) -def get_NuSVR_ConfigurationSpace(random_state=None): +def get_NuSVR_ConfigurationSpace(random_state): space = { 'nu': Float("nu", bounds=(0.05, 1.0)), 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), diff --git a/tpot2/config/special_configs.py b/tpot2/config/special_configs.py index 38545f6c..5d22dfad 100644 --- a/tpot2/config/special_configs.py +++ b/tpot2/config/special_configs.py @@ -30,54 +30,3 @@ def get_ArithmeticTransformer_ConfigurationSpace(): # MinTransformer: {} # MaxTransformer: {} - - -def get_FeatureSetSelector_ConfigurationSpace(names_list = None, subset_dict=None): - return ConfigurationSpace( - space = { - 'name': Categorical("name", names_list), - } - ) - - -def make_FSS_config_dictionary(subsets=None, n_features=None, feature_names=None): - """Create the search space of parameters for FeatureSetSelector. - - Parameters - ---------- - subsets: Sets the subsets to select from. - - str : If a string, it is assumed to be a path to a csv file with the subsets. - The first column is assumed to be the name of the subset and the remaining columns are the features in the subset. - - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets. - - n_features: int the number of features in the dataset. - If subsets is None, each column will be treated as a subset. One column will be selected per subset. - """ - - #require at least of of the parameters - if subsets is None and n_features is None: - raise ValueError('At least one of the parameters must be provided') - - if isinstance(subsets, str): - df = pd.read_csv(subsets,header=None,index_col=0) - df['features'] = df.apply(lambda x: list([x[c] for c in df.columns]),axis=1) - subset_dict = {} - for row in df.index: - subset_dict[row] = df.loc[row]['features'] - elif isinstance(subsets, dict): - subset_dict = subsets - elif isinstance(subsets, list) or isinstance(subsets, np.ndarray): - subset_dict = {str(i):subsets[i] for i in range(len(subsets))} - else: - if feature_names is None: - subset_dict = {str(i):i for i in range(n_features)} - else: - subset_dict = {str(i):feature_names[i] for i in range(len(feature_names))} - - names_list = list(subset_dict.keys()) - - return ConfigurationSpace({ - 'name': Categorical("name", names_list), - 'subset_dict': Categorical("subset", subset_dict), - }) - diff --git a/tpot2/config/tests/__init__.py b/tpot2/config/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tpot2/config/tests/test_get_configspace.py b/tpot2/config/tests/test_get_configspace.py new file mode 100644 index 00000000..a2ebcb59 --- /dev/null +++ b/tpot2/config/tests/test_get_configspace.py @@ -0,0 +1,26 @@ +import pytest +import tpot2 +from sklearn.datasets import load_iris +import random +import sklearn + +import tpot2.config + +from ..get_configspace import STRING_TO_CLASS + +def test_loop_through_all_hyperparameters(): + + n_classes=3 + n_samples=100 + n_features=100 + random_state=None + + for class_name, _ in STRING_TO_CLASS.items(): + estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + + #generate 10 random hyperparameters and make sure they are all valid + for i in range(10): + estnode = estnode_gen.generate() + est = estnode.export_pipeline() + + \ No newline at end of file diff --git a/tpot2/config/transformers.py b/tpot2/config/transformers.py index f74d5e18..04180ac4 100644 --- a/tpot2/config/transformers.py +++ b/tpot2/config/transformers.py @@ -18,6 +18,13 @@ ZeroCount_configspace = {} +PolynomialFeatures_configspace = ConfigurationSpace( + space = { + 'degree': Integer('degree', bounds=(2, 3)), + 'interaction_only': Categorical('interaction_only', [True, False]), + } +) + OneHotEncoder_configspace = {} #TODO include the parameter for max unique values def get_FastICA_configspace(n_features=100, random_state=None): @@ -76,3 +83,31 @@ def get_RBFSampler_configspace(n_features=100, random_state=None): space = space ) + + +def get_QuantileTransformer_configspace(random_state=None): + + space = { + 'n_quantiles': Integer('n_quantiles', bounds=(10, 2000)), + 'output_distribution': Categorical('output_distribution', ['uniform', 'normal']), + } + + if random_state is not None: #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + + ) + + + +### ROBUST SCALER + +RobustScaler_configspace = ConfigurationSpace({ + "q_min": Float("q_min", bounds=(0.001, 0.3)), + "q_max": Float("q_max", bounds=(0.7, 0.999)), + }) + +def robust_scaler_hyperparameter_parser(params): + return {"quantile_range": (params["q_min"], params["q_max"])} \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index 6e084b59..0ec71e98 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -1,22 +1,45 @@ # try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html -import tpot2 + import numpy as np -import pandas as pd -import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random from ..base import SklearnIndividual, SklearnIndividualGenerator from ConfigSpace import ConfigurationSpace +from typing import final + +NONE_SPECIAL_STRING = "" +TRUE_SPECIAL_STRING = "" +FALSE_SPECIAL_STRING = "" + + +def default_hyperparameter_parser(params:dict) -> dict: + return params + class EstimatorNodeIndividual(SklearnIndividual): + """ + Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "". TPOT will automatically replace instances of this string with the Python None. + + Parameters + ---------- + method : type + The class of the estimator to be used + + space : ConfigurationSpace|dict + The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned. + + """ def __init__(self, method: type, space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type? + hyperparameter_parser: callable = None, rng=None) -> None: super().__init__() self.method = method self.space = space + if hyperparameter_parser is None: + self.hyperparameter_parser = default_hyperparameter_parser + else: + self.hyperparameter_parser = hyperparameter_parser + if isinstance(space, dict): self.hyperparameters = space else: @@ -24,6 +47,8 @@ def __init__(self, method: type, self.space.seed(rng.integers(0, 2**32)) self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.check_hyperparameters_for_None() + def mutate(self, rng=None): if isinstance(self.space, dict): return False @@ -32,6 +57,7 @@ def mutate(self, rng=None): self.space.seed(rng.integers(0, 2**32)) self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.check_hyperparameters_for_None() return True def crossover(self, other, rng=None): @@ -48,17 +74,34 @@ def crossover(self, other, rng=None): if hyperparameter in other.hyperparameters: self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + self.check_hyperparameters_for_None() + + return True + + def check_hyperparameters_for_None(self): + for key, value in self.hyperparameters.items(): + #if string + if isinstance(value, str): + if value == NONE_SPECIAL_STRING: + self.hyperparameters[key] = None + elif value == TRUE_SPECIAL_STRING: + self.hyperparameters[key] = True + elif value == FALSE_SPECIAL_STRING: + self.hyperparameters[key] = False + + @final #this method should not be overridden, instead override hyperparameter_parser def export_pipeline(self, **kwargs): - return self.method(**self.hyperparameters) + return self.method(**self.hyperparameter_parser(self.hyperparameters)) def unique_id(self): #return a dictionary of the method and the hyperparameters return (self.method, self.hyperparameters) class EstimatorNode(SklearnIndividualGenerator): - def __init__(self, method, space): + def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser): self.method = method self.space = space + self.hyperparameter_parser = hyperparameter_parser def generate(self, rng=None): - return EstimatorNodeIndividual(self.method, self.space) \ No newline at end of file + return EstimatorNodeIndividual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng) \ No newline at end of file From a66ff10bf5f36ce7ebc64e44c72bb44b39bf6103 Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 17 Apr 2024 21:47:53 -0700 Subject: [PATCH 12/75] edits --- tpot2/config/get_configspace.py | 7 +- tpot2/config/tests/test.ipynb | 264 +++++++++++++++++++++ tpot2/config/tests/test_get_configspace.py | 3 +- tpot2/tpot_estimator/tests/__init__.py | 0 4 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 tpot2/config/tests/test.ipynb create mode 100644 tpot2/tpot_estimator/tests/__init__.py diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index cf75cd47..5706c4f6 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -156,7 +156,12 @@ "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], "classifiers" : ['AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "PassiveAggressiveClassifier", "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], "regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearDiscriminantAnalysis', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], - "transformers": ["Binarizer", "Normalizer", "PCA", "ZeroCount", "OneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"], + + + "transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"], + "scalers": ["MinMaxScaler", "RobustScaler", "StandardScaler", "MaxAbsScaler", "Normalizer", ], + "all_transformers" : ["transformers", "scalers"], + "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"], "imputers": [], "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], diff --git a/tpot2/config/tests/test.ipynb b/tpot2/config/tests/test.ipynb new file mode 100644 index 00000000..97580f08 --- /dev/null +++ b/tpot2/config/tests/test.ipynb @@ -0,0 +1,264 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib.util\n", + "import sys\n", + "import numpy as np\n", + "import warnings\n", + "\n", + "\n", + "\n", + "from ConfigSpace import ConfigurationSpace\n", + "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "\n", + "#autoqtl_builtins\n", + "from tpot2.builtin_modules import genetic_encoders\n", + "from tpot2.builtin_modules import feature_encoding_frequency_selector\n", + "\n", + "from sklearn.linear_model import SGDClassifier\n", + "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from xgboost import XGBClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.linear_model import LogisticRegression\n", + "from lightgbm import LGBMClassifier\n", + "from sklearn.svm import LinearSVC\n", + "from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB\n", + "from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier\n", + "\n", + "\n", + "from tpot2.builtin_modules import ZeroCount, OneHotEncoder, ColumnOneHotEncoder\n", + "from sklearn.preprocessing import Binarizer\n", + "from sklearn.decomposition import FastICA\n", + "from sklearn.cluster import FeatureAgglomeration\n", + "from sklearn.preprocessing import MaxAbsScaler\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.preprocessing import Normalizer\n", + "from sklearn.kernel_approximation import Nystroem\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.kernel_approximation import RBFSampler\n", + "from sklearn.preprocessing import RobustScaler\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.preprocessing import PowerTransformer, QuantileTransformer\n", + "\n", + "\n", + "from sklearn.feature_selection import SelectFwe\n", + "from sklearn.feature_selection import SelectPercentile\n", + "from sklearn.feature_selection import VarianceThreshold\n", + "from sklearn.feature_selection import RFE\n", + "from sklearn.feature_selection import SelectFromModel\n", + "\n", + "import sklearn.feature_selection\n", + "\n", + "#TODO create a selectomixin using these?\n", + "from sklearn.feature_selection import f_classif\n", + "from sklearn.feature_selection import f_regression\n", + "\n", + "\n", + "from sklearn.linear_model import SGDRegressor\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.linear_model import Lasso\n", + "from sklearn.linear_model import ElasticNet\n", + "from sklearn.linear_model import Lars\n", + "from sklearn.linear_model import LassoLars, LassoLarsCV\n", + "from sklearn.linear_model import RidgeCV\n", + "\n", + "from sklearn.svm import SVR, SVC\n", + "from sklearn.svm import LinearSVR, LinearSVC\n", + "\n", + "from sklearn.ensemble import AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor\n", + "from sklearn.ensemble import BaggingRegressor\n", + "from sklearn.ensemble import ExtraTreesRegressor\n", + "from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from sklearn.linear_model import ElasticNetCV\n", + "\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", + "\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "\n", + "from xgboost import XGBRegressor\n", + "\n", + "\n", + "from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer\n", + "\n", + "\n", + "#MDR\n", + "\n", + "\n", + "all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, LinearRegression, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,\n", + " AdaBoostClassifier,\n", + " GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor,\n", + " AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer,\n", + " PowerTransformer, QuantileTransformer,\n", + " ]\n", + "\n", + "\n", + "#if mdr is installed\n", + "if 'mdr' in sys.modules:\n", + " from mdr import MDR, ContinuousMDR\n", + " all_methods.append(MDR)\n", + " all_methods.append(ContinuousMDR)\n", + "\n", + "if 'skrebate' in sys.modules:\n", + " from skrebate import ReliefF, SURF, SURFstar, MultiSURF\n", + " all_methods.append(ReliefF)\n", + " all_methods.append(SURF)\n", + " all_methods.append(SURFstar)\n", + " all_methods.append(MultiSURF)\n", + "\n", + "if 'sklearnex' in sys.modules:\n", + " from sklearnex.linear_model import LinearRegression\n", + " from sklearnex.linear_model import Ridge\n", + " from sklearnex.linear_model import Lasso\n", + " from sklearnex.linear_model import ElasticNet\n", + " from sklearnex.svm import SVR\n", + " from sklearnex.svm import NuSVR\n", + " from sklearnex.ensemble import RandomForestRegressor\n", + " from sklearnex.neighbors import KNeighborsRegressor\n", + "\n", + " from sklearnex.ensemble import RandomForestClassifier\n", + " from sklearnex.neighbors import KNeighborsClassifier\n", + " from sklearnex.svm import SVC\n", + " from sklearnex.svm import NuSVC\n", + " from sklearnex.linear_model import LogisticRegression\n", + "\n", + "\n", + " all_methods.append(LinearRegression)\n", + " all_methods.append(Ridge)\n", + " all_methods.append(Lasso)\n", + " all_methods.append(ElasticNet)\n", + " all_methods.append(SVR)\n", + " all_methods.append(NuSVR)\n", + " all_methods.append(RandomForestRegressor)\n", + " all_methods.append(KNeighborsRegressor)\n", + " KNeighborsClassifier\n", + " all_methods.append(RandomForestClassifier)\n", + " all_methods.append(KNeighborsClassifier)\n", + " all_methods.append(SVC)\n", + " all_methods.append(NuSVC)\n", + " all_methods.append(LogisticRegression)\n", + "\n", + "\n", + "STRING_TO_CLASS = {\n", + " t.__name__: t for t in all_methods\n", + "}\n", + "\n", + "\n", + "from sklearn.linear_model import PassiveAggressiveClassifier\n", + "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n", + "from sklearn.linear_model import ARDRegression\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "\n", + "GROUPNAMES = {\n", + " \"selectors\": [\"SelectFwe\", \"SelectPercentile\", \"VarianceThreshold\",],\n", + " \"selectors_classification\": [\"SelectFwe\", \"SelectPercentile\", \"VarianceThreshold\", \"RFE_classification\", \"SelectFromModel_classification\"],\n", + " \"selectors_regression\": [\"SelectFwe\", \"SelectPercentile\", \"VarianceThreshold\", \"RFE_regression\", \"SelectFromModel_regression\"],\n", + " \"classifiers\" : ['AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier', 'LogisticRegression', \"LinearSVC\", \"SVC\", 'MLPClassifier', 'MultinomialNB', \"PassiveAggressiveClassifier\", \"QuadraticDiscriminantAnalysis\", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],\n", + " \"regressors\" : ['AdaBoostRegressor', \"ARDRegression\", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearDiscriminantAnalysis', 'LinearSVR', \"MLPRegressor\", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],\n", + " \"transformers\": [\"Binarizer\", \"Normalizer\", \"PCA\", \"ZeroCount\", \"OneHotEncoder\", \"FastICA\", \"FeatureAgglomeration\", \"Nystroem\", \"RBFSampler\", \"QuantileTransformer\", \"PowerTransformer\"],\n", + " \"arithmatic\": [\"AddTransformer\", \"mul_neg_1_Transformer\", \"MulTransformer\", \"SafeReciprocalTransformer\", \"EQTransformer\", \"NETransformer\", \"GETransformer\", \"GTTransformer\", \"LETransformer\", \"LTTransformer\", \"MinTransformer\", \"MaxTransformer\"],\n", + " \"imputers\": [],\n", + " \"skrebate\": [\"ReliefF\", \"SURF\", \"SURFstar\", \"MultiSURF\"],\n", + " \"genetic_encoders\": [\"DominantEncoder\", \"RecessiveEncoder\", \"HeterosisEncoder\", \"UnderDominanceEncoder\", \"OverDominanceEncoder\"],\n", + "\n", + " \"classifiers_sklearnex\" : [\"RandomForestClassifier_sklearnex\", \"LogisticRegression_sklearnex\", \"KNeighborsClassifier_sklearnex\", \"SVC_sklearnex\",\"NuSVC_sklearnex\"],\n", + " \"regressors_sklearnex\" : [\"LinearRegression_sklearnex\", \"Ridge_sklearnex\", \"Lasso_sklearnex\", \"ElasticNet_sklearnex\", \"SVR_sklearnex\", \"NuSVR_sklearnex\", \"RandomForestRegressor_sklearnex\", \"KNeighborsRegressor_sklearnex\"],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "RFE.__init__() missing 1 required positional argument: 'estimator'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 30\u001b[0m\n\u001b[1;32m 26\u001b[0m estnode \u001b[38;5;241m=\u001b[39m estnode_gen\u001b[38;5;241m.\u001b[39mgenerate()\n\u001b[1;32m 27\u001b[0m est \u001b[38;5;241m=\u001b[39m estnode\u001b[38;5;241m.\u001b[39mexport_pipeline()\n\u001b[0;32m---> 30\u001b[0m \u001b[43mtest_loop_through_all_hyperparameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[2], line 27\u001b[0m, in \u001b[0;36mtest_loop_through_all_hyperparameters\u001b[0;34m()\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m 26\u001b[0m estnode \u001b[38;5;241m=\u001b[39m estnode_gen\u001b[38;5;241m.\u001b[39mgenerate()\n\u001b[0;32m---> 27\u001b[0m est \u001b[38;5;241m=\u001b[39m \u001b[43mestnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexport_pipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/common/Projects/TPOT_Dev/tpot2/tpot2/search_spaces/nodes/estimator_node.py:92\u001b[0m, in \u001b[0;36mEstimatorNodeIndividual.export_pipeline\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;129m@final\u001b[39m \u001b[38;5;66;03m#this method should not be overridden, instead override hyperparameter_parser\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mexport_pipeline\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 92\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhyperparameter_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhyperparameters\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: RFE.__init__() missing 1 required positional argument: 'estimator'" + ] + } + ], + "source": [ + "import pytest\n", + "import tpot2\n", + "from sklearn.datasets import load_iris\n", + "import random\n", + "import sklearn\n", + "\n", + "import tpot2.config\n", + "\n", + "import importlib.util\n", + "import sys\n", + "import numpy as np\n", + "import warnings\n", + "\n", + "def test_loop_through_all_hyperparameters():\n", + "\n", + " n_classes=3\n", + " n_samples=100\n", + " n_features=100\n", + " random_state=None\n", + "\n", + " for class_name, _ in STRING_TO_CLASS.items():\n", + " estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)\n", + "\n", + " #generate 10 random hyperparameters and make sure they are all valid\n", + " for i in range(1):\n", + " estnode = estnode_gen.generate()\n", + " est = estnode.export_pipeline()\n", + " \n", + "\n", + "test_loop_through_all_hyperparameters()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tpot2.config.get_search_space(\"SGDClassifier\", n_classes=3, n_samples=100, n_features=5, random_state=5)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tpot2env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tpot2/config/tests/test_get_configspace.py b/tpot2/config/tests/test_get_configspace.py index a2ebcb59..bccb349f 100644 --- a/tpot2/config/tests/test_get_configspace.py +++ b/tpot2/config/tests/test_get_configspace.py @@ -19,8 +19,7 @@ def test_loop_through_all_hyperparameters(): estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) #generate 10 random hyperparameters and make sure they are all valid - for i in range(10): + for i in range(1): estnode = estnode_gen.generate() est = estnode.export_pipeline() - \ No newline at end of file diff --git a/tpot2/tpot_estimator/tests/__init__.py b/tpot2/tpot_estimator/tests/__init__.py new file mode 100644 index 00000000..e69de29b From ca42398b0952c5812ebe6679aeaa63f7fbbb5ca9 Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 18 Apr 2024 10:54:26 -0700 Subject: [PATCH 13/75] edits --- tpot2/config/get_configspace.py | 16 ++++++++-------- tpot2/tests/test_estimators.py | 7 ++++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 5706c4f6..473233ea 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -88,7 +88,10 @@ from sklearn.linear_model import ElasticNetCV from sklearn.discriminant_analysis import LinearDiscriminantAnalysis - +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.linear_model import ARDRegression +from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process import GaussianProcessRegressor @@ -105,7 +108,7 @@ AdaBoostClassifier, GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor, AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer, - PowerTransformer, QuantileTransformer, + PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis, ] @@ -145,17 +148,14 @@ } -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -from sklearn.linear_model import ARDRegression -from sklearn.gaussian_process import GaussianProcessRegressor + GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], - "classifiers" : ['AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "PassiveAggressiveClassifier", "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], - "regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearDiscriminantAnalysis', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], + "classifiers" : ['AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "PassiveAggressiveClassifier", "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], + "regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], "transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"], diff --git a/tpot2/tests/test_estimators.py b/tpot2/tests/test_estimators.py index 5c6f47ba..98b607e0 100644 --- a/tpot2/tests/test_estimators.py +++ b/tpot2/tests/test_estimators.py @@ -7,7 +7,8 @@ #standard test @pytest.fixture def tpot_estimator(): - return tpot2.TPOTEstimator( population_size=50, + return tpot2.TPOTEstimator( population_size=10, + generations=5, scorers=['roc_auc_ovr'], scorers_weights=[1], classification=True, @@ -81,11 +82,11 @@ def test_tpot_estimator_config_dict_type(): @pytest.fixture def tpot_classifier(): - return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=300,verbose=3) + return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=10,verbose=3) @pytest.fixture def tpot_regressor(): - return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=300,verbose=3) + return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=10,verbose=3) def test_tpot_classifier_fit(tpot_classifier,sample_dataset): #load iris dataset From a29a95dc99c8c9bb7d5ef2fddca75ee3e51aa26b Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 18 Apr 2024 14:20:49 -0700 Subject: [PATCH 14/75] fixes, passing tests --- README.md | 28 ++++++++++++++++++++++ tpot2/config/classifiers.py | 6 ++--- tpot2/config/get_configspace.py | 6 +++++ tpot2/tests/test_estimators.py | 41 +++++++++++++++++++++++---------- 4 files changed, 66 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f7551551..04920a14 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,34 @@ conda create --name tpot2env python=3.10 conda activate tpot2env ``` +### Packages Used + +python version <3.12 +numpy +scipy +scikit-learn +update_checker +tqdm +stopit +pandas +joblib +xgboost +matplotlib +traitlets +lightgbm +optuna +baikal +jupyter +networkx> +dask +distributed +dask-ml +dask-jobqueue +func_timeout +configspace + +Many of the hyperparameter ranges used in our configspaces were adapted from either the original TPOT package or the AutoSklearn package. + ### Note for M1 Mac or other Arm-based CPU users You need to install the lightgbm package directly from conda using the following command before installing TPOT2. diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 6423f328..4be33797 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -292,8 +292,8 @@ def get_PassiveAggressiveClassifier_ConfigurationSpace(random_state): #TODO support auto shrinkage when solver is svd. may require custom node def get_LinearDiscriminantAnalysis_ConfigurationSpace(): - solver = Categorical("solver", ['svd', 'lsqr', 'eigen']), - shrinkage = Float("shrinkage", bounds=(0, 1)), + solver = Categorical("solver", ['svd', 'lsqr', 'eigen']) + shrinkage = Float("shrinkage", bounds=(0, 1)) shrinkcond = NotEqualsCondition(shrinkage, solver, 'svd') @@ -301,7 +301,7 @@ def get_LinearDiscriminantAnalysis_ConfigurationSpace(): cs.add_hyperparameters([solver, shrinkage]) cs.add_conditions([shrinkcond]) - return + return cs diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 473233ea..19dfb531 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -192,6 +192,8 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta #classifiers.py + case "LinearDiscriminantAnalysis": + return classifiers.get_LinearDiscriminantAnalysis_ConfigurationSpace() case "AdaBoostClassifier": return classifiers.get_AdaBoostClassifier_ConfigurationSpace(random_state=random_state) case "LogisticRegression": @@ -232,6 +234,10 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta return regressors.ElasticNetCV_configspace case "RidgeCV": return {} + case "PassiveAggressiveClassifier": + return classifiers.get_PassiveAggressiveClassifier_ConfigurationSpace(random_state=random_state) + case "QuadraticDiscriminantAnalysis": + return classifiers.get_QuadraticDiscriminantAnalysis_ConfigurationSpace() #regressors.py case "RandomForestRegressor": diff --git a/tpot2/tests/test_estimators.py b/tpot2/tests/test_estimators.py index 98b607e0..f1f3d45a 100644 --- a/tpot2/tests/test_estimators.py +++ b/tpot2/tests/test_estimators.py @@ -4,11 +4,29 @@ import random import sklearn +@pytest.fixture +def sample_dataset(): + X_train, y_train = load_iris(return_X_y=True) + return X_train, y_train + #standard test @pytest.fixture def tpot_estimator(): - return tpot2.TPOTEstimator( population_size=10, - generations=5, + + n_classes=3 + n_samples=100 + n_features=100 + + search_space = tpot2.search_spaces.pipelines.GraphPipeline( + root_search_space= tpot2.config.get_search_space("classifiers", n_samples=n_samples, n_features=n_features, n_classes=n_classes), + leaf_search_space = None, + inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers"],n_samples=n_samples, n_features=n_features, n_classes=n_classes), + max_size = 10, + ) + return tpot2.TPOTEstimator( + search_space=search_space, + population_size=10, + generations=2, scorers=['roc_auc_ovr'], scorers_weights=[1], classification=True, @@ -16,13 +34,18 @@ def tpot_estimator(): early_stop=5, other_objective_functions= [], other_objective_functions_weights=[], - max_time_seconds=300, + max_time_seconds=30, verbose=3) @pytest.fixture -def sample_dataset(): - X_train, y_train = load_iris(return_X_y=True) - return X_train, y_train +def tpot_classifier(): + return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=10,verbose=3) + +@pytest.fixture +def tpot_regressor(): + return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=10,verbose=3) + + def test_tpot_estimator_fit(tpot_estimator,sample_dataset): #load iris dataset @@ -80,13 +103,7 @@ def test_tpot_estimator_config_dict_type(): -@pytest.fixture -def tpot_classifier(): - return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=10,verbose=3) -@pytest.fixture -def tpot_regressor(): - return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=10,verbose=3) def test_tpot_classifier_fit(tpot_classifier,sample_dataset): #load iris dataset From ef42226176bd37f9702d36b5b8a31ef72bdcce01 Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 18 Apr 2024 16:10:17 -0700 Subject: [PATCH 15/75] more fixes with search spaces - wrapper, make sure all supported modules are included --- .gitignore | 3 +- tpot2/config/classifiers_sklearnex.py | 7 +- tpot2/config/get_configspace.py | 81 +++++++++++---------- tpot2/config/tests/test_get_configspace.py | 20 ++++- tpot2/search_spaces/nodes/estimator_node.py | 4 +- tpot2/search_spaces/pipelines/wrapper.py | 6 +- 6 files changed, 71 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index bff01e19..aa4eabb7 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ dask-worker-space/ target/ .venv/ build/* -*.egg \ No newline at end of file +*.egg +*.coverage* \ No newline at end of file diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py index ad581898..e16d2c03 100644 --- a/tpot2/config/classifiers_sklearnex.py +++ b/tpot2/config/classifiers_sklearnex.py @@ -1,6 +1,6 @@ from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal - +from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING def get_RandomForestClassifier_ConfigurationSpace(random_state): space = { @@ -66,10 +66,9 @@ def get_NuSVC_ConfigurationSpace(random_state): space = { 'nu': Float("nu", bounds=(0.05, 1.0)), 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), - 'C': Float("C", bounds=(1e-4, 25), log=True), + #'C': Float("C", bounds=(1e-4, 25), log=True), 'degree': Integer("degree", bounds=(1, 4)), - #TODO work around for None value? - #'class_weight': Categorical("class_weight", [None, 'balanced']), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), 'max_iter': 3000, 'tol': 0.005, 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 19dfb531..15b889ac 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -2,6 +2,7 @@ import sys import numpy as np import warnings +import importlib.util from ..search_spaces.nodes import EstimatorNode from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline @@ -27,7 +28,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier -from sklearn.neural_network import MLPClassifier +from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.tree import DecisionTreeClassifier from xgboost import XGBClassifier from sklearn.neighbors import KNeighborsClassifier @@ -101,51 +102,64 @@ from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer +from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder + #MDR all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, - AdaBoostClassifier, + AdaBoostClassifier,MLPRegressor, GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor, AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer, PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis, + DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder, ] #if mdr is installed -if 'mdr' in sys.modules: +if importlib.util.find_spec('mdr') is not None: from mdr import MDR, ContinuousMDR all_methods.append(MDR) all_methods.append(ContinuousMDR) -if 'skrebate' in sys.modules: +if importlib.util.find_spec('skrebate') is not None: from skrebate import ReliefF, SURF, SURFstar, MultiSURF all_methods.append(ReliefF) all_methods.append(SURF) all_methods.append(SURFstar) all_methods.append(MultiSURF) -if 'sklearnex' in sys.modules: +STRING_TO_CLASS = { + t.__name__: t for t in all_methods +} + +if importlib.util.find_spec('sklearnex') is not None: import sklearnex + import sklearnex.linear_model + import sklearnex.svm + import sklearnex.ensemble + import sklearnex.neighbors - all_methods.append(sklearnex.linear_model.LinearRegression) - all_methods.append(sklearnex.linear_model.Ridge) - all_methods.append(sklearnex.linear_model.Lasso) - all_methods.append(sklearnex.linear_model.ElasticNet) - all_methods.append(sklearnex.svm.SVR) - all_methods.append(sklearnex.svm.NuSVR) - all_methods.append(sklearnex.ensemble.RandomForestRegressor) - all_methods.append(sklearnex.neighbors.KNeighborsRegressor) - all_methods.append(sklearnex.ensemble.RandomForestClassifier) - all_methods.append(sklearnex.neighbors.KNeighborsClassifier) - all_methods.append(sklearnex.svm.SVC) - all_methods.append(sklearnex.svm.NuSVC) - all_methods.append(sklearnex.linear_model.LogisticRegression) + + sklearnex_methods = [] + + sklearnex_methods.append(sklearnex.linear_model.LinearRegression) + sklearnex_methods.append(sklearnex.linear_model.Ridge) + sklearnex_methods.append(sklearnex.linear_model.Lasso) + sklearnex_methods.append(sklearnex.linear_model.ElasticNet) + sklearnex_methods.append(sklearnex.svm.SVR) + sklearnex_methods.append(sklearnex.svm.NuSVR) + sklearnex_methods.append(sklearnex.ensemble.RandomForestRegressor) + sklearnex_methods.append(sklearnex.neighbors.KNeighborsRegressor) + sklearnex_methods.append(sklearnex.ensemble.RandomForestClassifier) + sklearnex_methods.append(sklearnex.neighbors.KNeighborsClassifier) + sklearnex_methods.append(sklearnex.svm.SVC) + sklearnex_methods.append(sklearnex.svm.NuSVC) + sklearnex_methods.append(sklearnex.linear_model.LogisticRegression) + + STRING_TO_CLASS.update({f"{t.__name__}_sklearnex": t for t in sklearnex_methods}) -STRING_TO_CLASS = { - t.__name__: t for t in all_methods -} @@ -439,15 +453,6 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st if name in GROUPNAMES: name_list = GROUPNAMES[name] return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) - - if name is None: - warnings.warn(f"name is None") - return None - - if name not in STRING_TO_CLASS: - print("FOOO ", name) - warnings.warn(f"Could not find class for {name}") - return None return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) @@ -458,21 +463,21 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? # TODO add other meta-estimators? if name == "RFE_classification": - rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp) if name == "RFE_regression": - rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp) if name == "SelectFromModel_classification": - sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp) if name == "SelectFromModel_regression": - sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp) #these are nodes that have special search spaces which require custom parsing of the hyperparameters if name == "RobustScaler": diff --git a/tpot2/config/tests/test_get_configspace.py b/tpot2/config/tests/test_get_configspace.py index bccb349f..bdab516b 100644 --- a/tpot2/config/tests/test_get_configspace.py +++ b/tpot2/config/tests/test_get_configspace.py @@ -6,7 +6,7 @@ import tpot2.config -from ..get_configspace import STRING_TO_CLASS +from ..get_configspace import STRING_TO_CLASS, GROUPNAMES def test_loop_through_all_hyperparameters(): @@ -22,4 +22,20 @@ def test_loop_through_all_hyperparameters(): for i in range(1): estnode = estnode_gen.generate() est = estnode.export_pipeline() - \ No newline at end of file + +def test_loop_through_groupnames(): + + n_classes=3 + n_samples=100 + n_features=100 + random_state=None + + for groupname, group in GROUPNAMES.items(): + for class_name in group: + print(class_name) + estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + + #generate 10 random hyperparameters and make sure they are all valid + for i in range(100): + estnode = estnode_gen.generate() + est = estnode.export_pipeline() \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index 0ec71e98..15b79b3e 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -45,7 +45,7 @@ def __init__(self, method: type, else: rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) self.check_hyperparameters_for_None() @@ -55,7 +55,7 @@ def mutate(self, rng=None): rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) self.check_hyperparameters_for_None() return True diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index 3521d8dd..712da75d 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -21,14 +21,14 @@ def __init__(self, super().__init__() self.nodegen = nodegen - self.node = np.random.default_rng(rng).choice(self.nodegen).generate() + self.node = self.nodegen.generate(rng) self.method = method self.space = space rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) @@ -43,7 +43,7 @@ def mutate(self, rng=None): def _mutate_hyperparameters(self, rng=None): rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) return True def _mutate_node(self, rng=None): From d61cd29e86e17060c4444e2376e812626954bdbf Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 18 Apr 2024 17:45:09 -0700 Subject: [PATCH 16/75] added unique_id, fixed issue where graphpipeline not correctly identifying classifiers/regressors --- Tutorial/2_Search_Spaces.ipynb | 1101 ++++++++--------- tpot2/config/classifiers.py | 8 +- tpot2/config/get_configspace.py | 2 +- tpot2/graphsklearn.py | 6 +- tpot2/search_spaces/base.py | 4 - tpot2/search_spaces/nodes/estimator_node.py | 2 +- .../nodes/estimator_node_simple.py | 2 +- tpot2/search_spaces/nodes/fss_node.py | 2 +- .../nodes/genetic_feature_selection.py | 2 +- tpot2/search_spaces/pipelines/graph.py | 65 +- tpot2/search_spaces/pipelines/sequential.py | 2 +- tpot2/search_spaces/pipelines/wrapper.py | 21 +- tpot2/tests/test_estimators.py | 8 +- tpot2/tests/test_hello_world.py | 4 - tpot2/tests/test_nodes.py | 133 +- 15 files changed, 626 insertions(+), 736 deletions(-) diff --git a/Tutorial/2_Search_Spaces.ipynb b/Tutorial/2_Search_Spaces.ipynb index 51d2aff7..85e523a4 100644 --- a/Tutorial/2_Search_Spaces.ipynb +++ b/Tutorial/2_Search_Spaces.ipynb @@ -31,7 +31,7 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 6, 'p': 3, 'weights': 'distance'}\n" + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 1, 'p': 3, 'weights': 'distance'}\n" ] } ], @@ -156,9 +156,9 @@ "output_type": "stream", "text": [ "sampled hyperparameters\n", - "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'distance'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 9, 'p': 3, 'weights': 'uniform'}\n", "mutated hyperparameters\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}\n" ] } ], @@ -189,14 +189,14 @@ "output_type": "stream", "text": [ "original hyperparameters for individual 1\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 3, 'p': 3, 'weights': 'uniform'}\n", "original hyperparameters for individual 2\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 4, 'p': 1, 'weights': 'uniform'}\n", + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 2, 'p': 3, 'weights': 'distance'}\n", "\n", "post crossover hyperparameters for individual 1\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}\n", + "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 2, 'p': 3, 'weights': 'uniform'}\n", "post crossover hyperparameters for individual 2\n", - "{'metric': 'euclidean', 'n_jobs': 1, 'n_neighbors': 4, 'p': 1, 'weights': 'uniform'}\n" + "{'metric': 'minkowski', 'n_jobs': 1, 'n_neighbors': 2, 'p': 3, 'weights': 'distance'}\n" ] } ], @@ -639,10 +639,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=2, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9)" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=2, p=3)" ] }, "execution_count": 5, @@ -663,13 +663,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
KNeighborsClassifier(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "KNeighborsClassifier(n_neighbors=10)" ] }, - "execution_count": 1, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1121,16 +1121,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1228,7 +1228,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1241,7 +1241,7 @@ { "data": { "text/html": [ - "
LogisticRegression(C=99.0450142669678, class_weight='balanced', dual=True,\n",
-       "                   max_iter=1000, n_jobs=1, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=2, p=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "LogisticRegression(C=99.0450142669678, class_weight='balanced', dual=True,\n", - " max_iter=1000, n_jobs=1, solver='liblinear')" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=2, p=1)" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1668,7 +1665,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1681,7 +1678,7 @@ { "data": { "text/html": [ - "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=4, p=3,\n",
-       "                     weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9, p=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=4, p=3,\n", - " weights='distance')" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=9, p=3)" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -2125,7 +2119,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -2138,7 +2132,7 @@ { "data": { "text/html": [ - "
DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features='log2',\n",
-       "                       min_samples_leaf=4, min_samples_split=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, n_neighbors=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features='log2',\n", - " min_samples_leaf=4, min_samples_split=10)" + "KNeighborsClassifier(n_jobs=1, n_neighbors=2)" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -2566,7 +2557,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -2579,7 +2570,7 @@ { "data": { "text/html": [ - "
DecisionTreeClassifier(criterion='entropy', max_depth=25, max_features='log2',\n",
-       "                       min_samples_leaf=6, min_samples_split=13)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1,\n",
+       "                     weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "DecisionTreeClassifier(criterion='entropy', max_depth=25, max_features='log2',\n", - " min_samples_leaf=6, min_samples_split=13)" + "KNeighborsClassifier(metric='euclidean', n_jobs=1, n_neighbors=1, p=1,\n", + " weights='distance')" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -3004,7 +2995,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -3017,7 +3008,7 @@ { "data": { "text/html": [ - "
BernoulliNB(alpha=1.1043626639293316)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
MLPClassifier(alpha=0.0013923360596347684, hidden_layer_sizes=[443],\n",
+       "              learning_rate=0.08711410737884528, n_iter_no_change=32)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "BernoulliNB(alpha=1.1043626639293316)" + "MLPClassifier(alpha=0.0013923360596347684, hidden_layer_sizes=[443],\n", + " learning_rate=0.08711410737884528, n_iter_no_change=32)" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -3442,7 +3436,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -3455,7 +3449,7 @@ { "data": { "text/html": [ - "
SVC(C=0.007250294080496579, degree=2, max_iter=3000, probability=True)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
KNeighborsClassifier(n_jobs=1, n_neighbors=1, p=3, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "SVC(C=0.007250294080496579, degree=2, max_iter=3000, probability=True)" + "KNeighborsClassifier(n_jobs=1, n_neighbors=1, p=3, weights='distance')" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -3886,7 +3880,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -3899,7 +3893,7 @@ { "data": { "text/html": [ - "
Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0004402567631974485)),\n",
-       "                ('rbfsampler',\n",
-       "                 RBFSampler(gamma=0.5507862784926447, n_components=4)),\n",
-       "                ('multinomialnb', MultinomialNB(alpha=0.019703201853925403))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('selectpercentile',\n",
+       "                 SelectPercentile(percentile=88.46562641339226)),\n",
+       "                ('columnonehotencoder', ColumnOneHotEncoder()),\n",
+       "                ('lineardiscriminantanalysis',\n",
+       "                 LinearDiscriminantAnalysis(shrinkage=0.40980175929055784,\n",
+       "                                            solver='eigen'))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('selectfwe', SelectFwe(alpha=0.0004402567631974485)),\n", - " ('rbfsampler',\n", - " RBFSampler(gamma=0.5507862784926447, n_components=4)),\n", - " ('multinomialnb', MultinomialNB(alpha=0.019703201853925403))])" + "Pipeline(steps=[('selectpercentile',\n", + " SelectPercentile(percentile=88.46562641339226)),\n", + " ('columnonehotencoder', ColumnOneHotEncoder()),\n", + " ('lineardiscriminantanalysis',\n", + " LinearDiscriminantAnalysis(shrinkage=0.40980175929055784,\n", + " solver='eigen'))])" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -4337,7 +4337,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -4350,7 +4350,7 @@ { "data": { "text/html": [ - "
Pipeline(steps=[('selectpercentile',\n",
-       "                 SelectPercentile(percentile=1.0089148758394795)),\n",
-       "                ('nystroem',\n",
-       "                 Nystroem(gamma=0.2371171340711561, kernel='cosine',\n",
-       "                          n_components=73)),\n",
-       "                ('xgbclassifier',\n",
-       "                 XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
-       "                               colsample_bylevel=None, colsample_bynode=None,\n",
-       "                               colsample_bytree=None, device=None,\n",
-       "                               early_stopping_rounds=None,\n",
-       "                               enab...\n",
-       "                               feature_types=None, gamma=None, grow_policy=None,\n",
-       "                               importance_type=None,\n",
-       "                               interaction_constraints=None,\n",
-       "                               learning_rate=0.003591562007988768, max_bin=None,\n",
-       "                               max_cat_threshold=None, max_cat_to_onehot=None,\n",
-       "                               max_delta_step=None, max_depth=8,\n",
-       "                               max_leaves=None, min_child_weight=1, missing=nan,\n",
-       "                               monotone_constraints=None, multi_strategy=None,\n",
-       "                               n_estimators=100, n_jobs=1,\n",
-       "                               num_parallel_tree=None, random_state=None, ...))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('selectpercentile',\n",
+       "                 SelectPercentile(percentile=90.16586548452537)),\n",
+       "                ('binarizer', Binarizer(threshold=0.5722157107778224)),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(bootstrap=False, max_features=76,\n",
+       "                                        min_samples_leaf=4,\n",
+       "                                        min_samples_split=14,\n",
+       "                                        n_estimators=128))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('selectpercentile',\n", - " SelectPercentile(percentile=1.0089148758394795)),\n", - " ('nystroem',\n", - " Nystroem(gamma=0.2371171340711561, kernel='cosine',\n", - " n_components=73)),\n", - " ('xgbclassifier',\n", - " XGBClassifier(base_score=None, booster=None, callbacks=None,\n", - " colsample_bylevel=None, colsample_bynode=None,\n", - " colsample_bytree=None, device=None,\n", - " early_stopping_rounds=None,\n", - " enab...\n", - " feature_types=None, gamma=None, grow_policy=None,\n", - " importance_type=None,\n", - " interaction_constraints=None,\n", - " learning_rate=0.003591562007988768, max_bin=None,\n", - " max_cat_threshold=None, max_cat_to_onehot=None,\n", - " max_delta_step=None, max_depth=8,\n", - " max_leaves=None, min_child_weight=1, missing=nan,\n", - " monotone_constraints=None, multi_strategy=None,\n", - " n_estimators=100, n_jobs=1,\n", - " num_parallel_tree=None, random_state=None, ...))])" + " SelectPercentile(percentile=90.16586548452537)),\n", + " ('binarizer', Binarizer(threshold=0.5722157107778224)),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(bootstrap=False, max_features=76,\n", + " min_samples_leaf=4,\n", + " min_samples_split=14,\n", + " n_estimators=128))])" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -4851,20 +4803,24 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 5/5 [00:05<00:00, 1.16s/it]\n" + "Generation: 100%|██████████| 5/5 [00:07<00:00, 1.55s/it]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/preprocessing/_data.py:2762: UserWarning: n_quantiles (1063) is greater than the total number of samples (1000). n_quantiles is set to n_samples.\n", + " warnings.warn(\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/preprocessing/_data.py:2762: UserWarning: n_quantiles (1063) is greater than the total number of samples (1000). n_quantiles is set to n_samples.\n", + " warnings.warn(\n" ] }, { "data": { "text/html": [ - "
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
+       "
TPOTEstimator(classification=True, generations=5, max_eval_time_seconds=300,\n",
        "              population_size=10, scorers=['roc_auc'], scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.graph.GraphPipeline object at 0x7544c5ab8f40>,\n",
-       "              verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
FeatureSetSelector(name='group_one', sel_subset=['a', 'b', 'c'])
[('DecisionTreeClassifier_1', 'RBFSampler_1'), ('DecisionTreeClassifier_1', 'QuantileTransformer_1'), ('DecisionTreeClassifier_1', 'ColumnOneHotEncoder_1'), ('ColumnOneHotEncoder_1', 'ZeroCount_1'), ('QuantileTransformer_1', 'ColumnOneHotEncoder_1'), ('QuantileTransformer_1', 'PowerTransformer_2'), ('RBFSampler_1', 'QuantileTransformer_2'), ('PowerTransformer_1', 'Binarizer_1'), ('Binarizer_1', 'QuantileTransformer_2'), ('ZeroCount_1', 'PowerTransformer_1'), ('PowerTransformer_2', 'ZeroCount_1')]
" ], "text/plain": [ "Pipeline(steps=[('featuresetselector',\n", " FeatureSetSelector(name='group_one',\n", " sel_subset=['a', 'b', 'c'])),\n", " ('graphpipeline',\n", - " GraphPipeline(graph=))])" + " GraphPipeline(graph=))])" ] }, "execution_count": 4, @@ -815,18 +819,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 0%| | 0/5 [00:00" ] @@ -903,18 +913,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 0%| | 0/5 [00:00" ] @@ -96,20 +90,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "LogisticRegression_1 : LogisticRegression(C=0.28751652817028706, class_weight='balanced',\n", - " max_iter=1000, n_jobs=1, solver='liblinear')\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='93', sel_subset=[93])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='25', sel_subset=[25])\n", - "LETransformer_1 : LETransformer()\n", - "LTTransformer_1 : LTTransformer()\n", - "MinTransformer_1 : MinTransformer()\n" + "LogisticRegression_1 : LogisticRegression(C=1.7363936958422204, class_weight='balanced', max_iter=1000,\n", + " n_jobs=1, solver='saga')\n", + "AddTransformer_1 : AddTransformer()\n", + "FeatureSetSelector_1 : FeatureSetSelector(name='84', sel_subset=[84])\n" ] } ], @@ -121,12 +112,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -156,38 +147,26 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", - "Perhaps you already have a cluster running?\n", - "Hosting the HTTP server on port 37681 instead\n", - " warnings.warn(\n", - "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", - " self.evaluated_individuals.loc[key,column_names] = data\n", - "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:204: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID', 'INVALID']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", - " self.evaluated_individuals.loc[key,column_names] = data\n", - "/home/ribeirop/common/Projects/TPOT_Dev/tpot2/tpot2/population.py:381: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ind_crossover' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", - " self.evaluated_individuals.at[new_child.unique_id(),\"Variation_Function\"] = var_op\n", - "Generation: 100%|██████████| 20/20 [00:05<00:00, 3.52it/s]\n", - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_stochastic_gradient.py:1575: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", - " warnings.warn(\n" + "Generation: 100%|██████████| 20/20 [00:09<00:00, 2.09it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "-6120.015400135764\n" + "-4348.811587281301\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -206,7 +185,7 @@ "\n", "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", " root_search_space= tpot2.config.get_search_space(\"SGDRegressor\"),\n", - " leaf_search_space = tpot2.search_spaces.nodes.FSSNode(subsets=n_features), \n", + " leaf_search_space = tpot2.search_spaces.nodes.FSSNode(subsets=X_train.shape[1]), \n", " inner_search_space = tpot2.config.get_search_space([\"arithmatic\"]),\n", " max_size = 10,\n", ")\n", @@ -219,7 +198,7 @@ " n_jobs=32,\n", " classification=False,\n", " search_space = graph_search_space ,\n", - " verbose=1,\n", + " verbose=2,\n", " )\n", "\n", "\n", @@ -231,16 +210,25 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "SGDRegressor_1 : SGDRegressor()\n", - "FeatureSetSelector_1 : FeatureSetSelector(name='7', sel_subset=[7])\n", - "FeatureSetSelector_2 : FeatureSetSelector(name='7', sel_subset=[7])\n" + "SGDRegressor_1 : SGDRegressor(alpha=6.014583593220849e-05, epsilon=2.109266488257155e-05,\n", + " eta0=0.06363149574923024, l1_ratio=2.519434640584705e-06,\n", + " learning_rate='constant', loss='squared_epsilon_insensitive',\n", + " penalty='elasticnet')\n", + "FeatureSetSelector_1 : FeatureSetSelector(name='8', sel_subset=[8])\n", + "FeatureSetSelector_2 : FeatureSetSelector(name='2', sel_subset=[2])\n", + "FeatureSetSelector_3 : FeatureSetSelector(name='9', sel_subset=[9])\n", + "GTTransformer_1 : GTTransformer()\n", + "LTTransformer_1 : LTTransformer()\n", + "LTTransformer_2 : LTTransformer()\n", + "FeatureSetSelector_4 : FeatureSetSelector(name='1', sel_subset=[1])\n", + "MaxTransformer_1 : MaxTransformer()\n" ] } ], @@ -252,12 +240,12 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAGwCAYAAABmTltaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/i0lEQVR4nO3de1yUZf7/8feAAqKAWoAYJKh51lBIxE4eKNJS2WrLQyWZpqaZghW2v0TJPOYhT9nmpq52wDLdWpU0TNuUVUMxLTXL8wHMVQFNUeD+/dGj+Tahxj0OMIOv5+NxP2qu67rv+dxX5rwf933NPRbDMAwBAACg1NwqugAAAABXQ4ACAAAwiQAFAABgEgEKAADAJAIUAACASQQoAAAAkwhQAAAAJlWp6AIqo+LiYh0/flw+Pj6yWCwVXQ4AACgFwzCUn5+vunXrys3t2teYCFBl4Pjx4woJCanoMgAAgB2OHDmi4ODga44hQJUBHx8fSb/+B/D19a3gagAAQGnk5eUpJCTE+jl+LQSoMvDbbTtfX18CFAAALqY0y29YRA4AAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIACAAAwiQAFAABgEk8iBwAALqOo2NCWA6d1Mv+iAny81Dasttzd/vzJ4Y5GgAIAAC4hbdcJjf3se53IvWhtC/LzUnK3ZnqgRVC51sItPAAA4PTSdp3Q4CXbbMKTJGXnXtTgJduUtutEudZDgAIAAE6tqNjQ2M++l3GFvt/axn72vYqKrzSibBCgAACAU9ty4HSJK0+/Z0g6kXtRWw6cLreaCFAAAMCpncy/eniyZ5wjEKAAAIBTC/Dxcug4RyBAAQAAp9Y2rLaC/Lx0tYcVWPTrt/HahtUut5oIUAAAwKm5u1mU3K2ZJJUIUb+9Tu7WrFyfB0WAAgAATu+BFkF664k2quNne5uujp+X3nqiTbk/B4oHaQIAAJfwQIsg3desDk8iBwAAMMPdzaLoBjdVdBncwgMAADCLAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIACAAAwiQAFAABgEgEKAADAJAIUAACASQQoAAAAkwhQAAAAJhGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADDJ5QJUQUGBwsPDZbFYlJWVZW0fM2aMLBZLia169erWMQsXLizR7+XlZXN8wzA0evRoBQUFqVq1aoqJidG+ffvK6/QAAIALcLkA9dJLL6lu3bol2keOHKkTJ07YbM2aNdNf//pXm3G+vr42Yw4dOmTTP3nyZM2cOVPz5s3T5s2bVb16dcXGxurixYtlel4AAMB1VKnoAsxYvXq11qxZo2XLlmn16tU2fTVq1FCNGjWsr3fs2KHvv/9e8+bNsxlnsVhUp06dKx7fMAzNmDFD/+///T/16NFDkvTPf/5TgYGBWrFihXr27OngMwIAAK7IZa5A5eTkaMCAAVq8eLG8vb3/dPz8+fPVqFEj3X333Tbt586dU7169RQSEqIePXrou+++s/YdOHBA2dnZiomJsbb5+fkpKipKGRkZV32vgoIC5eXl2WwAAKDycokAZRiG4uPjNWjQIEVGRv7p+IsXL+q9997TM888Y9PeuHFjvfvuu/rXv/6lJUuWqLi4WO3bt9fRo0clSdnZ2ZKkwMBAm/0CAwOtfVcyYcIE+fn5WbeQkBCzpwgAAFxIhQaopKSkKy78/v22Z88ezZo1S/n5+Ro1alSpjrt8+XLl5+erb9++Nu3R0dF66qmnFB4ernvvvVeffPKJ/P399fbbb1/XeYwaNUq5ubnW7ciRI9d1PAAA4NwqdA1UYmKi4uPjrzmmfv36WrdunTIyMuTp6WnTFxkZqT59+mjRokU27fPnz9dDDz1U4krSH1WtWlWtW7fWjz/+KEnWtVE5OTkKCgqyjsvJyVF4ePhVj+Pp6VmiNgAAUHlVaIDy9/eXv7//n46bOXOmxo0bZ319/PhxxcbGKjU1VVFRUTZjDxw4oC+//FKffvrpnx63qKhIO3fuVNeuXSVJYWFhqlOnjtLT062BKS8vT5s3b9bgwYNNnBkAAKjMXOJbeLfeeqvN69++bdegQQMFBwfb9L377rsKCgpSly5dShwnJSVF7dq1U8OGDXX27FlNmTJFhw4dUv/+/SX9+g294cOHa9y4cbrtttsUFhamV199VXXr1lVcXFzZnBwAAHA5LhGgSqu4uFgLFy5UfHy83N3dS/SfOXNGAwYMUHZ2tmrVqqWIiAht2rRJzZo1s4556aWXdP78eT377LM6e/as7rrrLqWlpZV44CYAALhxWQzDMCq6iMomLy9Pfn5+ys3Nla+vb0WXAwAASsHM57dLPMYAAADAmRCgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIACAAAwiQAFAABgEgEKAADAJAIUAACASQQoAAAAkwhQAAAAJhGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTTAeowsJCpaSk6OjRo2VRDwAAgNMzHaCqVKmiKVOmqLCwsCzqAQAAcHp23cLr1KmTNmzY4OhaAAAAXIJdAapLly5KSkrSyJEj9cEHH+jTTz+12cpSQUGBwsPDZbFYlJWVZdP3+eefq127dvLx8ZG/v78eeeQRHTx40GbM+vXr1aZNG3l6eqphw4ZauHBhifeYM2eOQkND5eXlpaioKG3ZsqXsTggAALgci2EYhtmd3NyunrssFouKioquq6hreeGFF7Rv3z6tXr1a27dvV3h4uCTpwIEDatq0qRISEvTMM88oNzdXI0aMUH5+vrZt22Yd06JFCw0aNEj9+/dXenq6hg8frpUrVyo2NlaSlJqaqqeeekrz5s1TVFSUZsyYoY8++kh79+5VQEBAqWrMy8uTn5+fcnNz5evrWybzAAAAHMvM57ddAaqirF69WgkJCVq2bJmaN29uE6A+/vhj9erVSwUFBdaA99lnn6lHjx4qKChQ1apV9fLLL2vlypXatWuX9Zg9e/bU2bNnlZaWJkmKiorSHXfcodmzZ0uSiouLFRISoueff15JSUmlqpMABQCA6zHz+e0yjzHIycnRgAEDtHjxYnl7e5foj4iIkJubmxYsWKCioiLl5uZq8eLFiomJUdWqVSVJGRkZiomJsdkvNjZWGRkZkqRLly4pMzPTZoybm5tiYmKsY66koKBAeXl5NhsAAKi87A5QGzZsULdu3dSwYUM1bNhQ3bt313/+8x9H1mZlGIbi4+M1aNAgRUZGXnFMWFiY1qxZo1deeUWenp6qWbOmjh49qqVLl1rHZGdnKzAw0Ga/wMBA5eXl6cKFCzp16pSKioquOCY7O/uq9U2YMEF+fn7WLSQk5DrOFgAAODu7AtSSJUsUExMjb29vDRs2TMOGDVO1atXUuXNnvf/++6U+TlJSkiwWyzW3PXv2aNasWcrPz9eoUaOueqzs7GwNGDBAffv21datW7VhwwZ5eHjo0UcfVVnfpRw1apRyc3Ot25EjR8r0/QAAQMWqYs9Or7/+uiZPnqwRI0ZY24YNG6Zp06bptddeU+/evUt1nMTERMXHx19zTP369bVu3TplZGTI09PTpi8yMlJ9+vTRokWLNGfOHPn5+Wny5MnW/iVLligkJESbN29Wu3btVKdOHeXk5NgcIycnR76+vqpWrZrc3d3l7u5+xTF16tS5ao2enp4lagMAAJWXXQFq//796tatW4n27t2765VXXin1cfz9/eXv7/+n42bOnKlx48ZZXx8/flyxsbFKTU1VVFSUJOmXX34p8e1Ad3d3Sb8uBJek6OhorVq1ymbM2rVrFR0dLUny8PBQRESE0tPTFRcXZ903PT1dQ4cOLfV5AQCAys2uW3ghISFKT08v0f7FF1+UyfqfW2+9VS1atLBujRo1kiQ1aNBAwcHBkqQHH3xQW7duVUpKivbt26dt27bp6aefVr169dS6dWtJ0qBBg7R//3699NJL2rNnj+bOnaulS5faXElLSEjQO++8o0WLFmn37t0aPHiwzp8/r6efftrh5wUAAFyTXVegEhMTNWzYMGVlZal9+/aSpI0bN2rhwoV68803HVpgaXXq1Envv/++Jk+erMmTJ8vb21vR0dFKS0tTtWrVJP260HzlypUaMWKE3nzzTQUHB2v+/PnWZ0BJ0uOPP66ff/5Zo0ePVnZ2tsLDw5WWllZiYTkAALhx2f0cqOXLl2vq1KnavXu3JKlp06Z68cUX1aNHD4cW6Ip4DhQAAK7HzOe36StQhYWFGj9+vPr166evv/7a7iIBAABclek1UFWqVNHkyZNVWFhYFvUAAAA4PbsWkXfu3FkbNmxwdC0AAAAuwa5F5F26dFFSUpJ27typiIgIVa9e3aa/e/fuDikOAADAGdm1iPyPz1uyOaDFoqKiousqytWxiBwAANdTpovIpf97MCUAAMCNyPQaqMuXL6tKlSratWtXWdQDAADg9EwHqKpVq+rWW2+94W/TAQCAG5dd38L729/+pldeeUWnT592dD0AAABOz641ULNnz9aPP/6ounXrql69eiW+hbdt2zaHFAcAAOCM7ApQcXFxDi4DAADAddj9W3i4Oh5jAACA6zHz+W3XGihJOnv2rObPn69Ro0ZZ10Jt27ZNx44ds/eQAAAALsGuW3jffvutYmJi5Ofnp4MHD2rAgAGqXbu2PvnkEx0+fFj//Oc/HV0nAACA07DrClRCQoLi4+O1b98+eXl5Wdu7du2qr776ymHFAQAAOCO7AtTWrVs1cODAEu233HKLsrOzr7soAAAAZ2ZXgPL09FReXl6J9h9++EH+/v7XXRQAAIAzsytAde/eXSkpKbp8+bKkX39A+PDhw3r55Zf1yCOPOLRAAAAAZ2NXgJo6darOnTungIAAXbhwQffee68aNmwoHx8fvf76646uEQAAwKnY9S08Pz8/rV27Vhs3btSOHTt07tw5tWnTRjExMY6uDwAAwOmU6YM0W7ZsqVWrVikkJKSs3sIp8SBNAABcT7k8SLM0Dh48aF0nBQAAUFmUaYACAACojAhQAAAAJhGgAAAATCJAAQAAmESAAgAAMKlMA9Tbb7+twMDAsnwLAACAclfqB2nOnDmz1AcdNmyYJKl3797mKwIAAHBypX6QZlhYmM3rn3/+Wb/88otq1qwpSTp79qy8vb0VEBCg/fv3O7xQV8KDNAEAcD1l8iDNAwcOWLfXX39d4eHh2r17t06fPq3Tp09r9+7datOmjV577bXrPgEAAABnZtdPuTRo0EAff/yxWrdubdOemZmpRx99VAcOHHBYga6IK1AAALieMv8plxMnTqiwsLBEe1FRkXJycuw5JAAAgMuwK0B17txZAwcO1LZt26xtmZmZGjx4sGJiYhxWHAAAgDOyK0C9++67qlOnjiIjI+Xp6SlPT0+1bdtWgYGBmj9/vqNrBAAAcCqlfozB7/n7+2vVqlX64YcftGfPHklSkyZN1KhRI4cWBwAA4IzsClC/CQ0NlWEYatCggapUua5DAQAAuAy7buH98ssveuaZZ+Tt7a3mzZvr8OHDkqTnn39eEydOdGiBAAAAzsauADVq1Cjt2LFD69evl5eXl7U9JiZGqampDisOAADAGdl1323FihVKTU1Vu3btZLFYrO3NmzfXTz/95LDiAAAAnJFdV6B+/vlnBQQElGg/f/68TaACAACojOwKUJGRkVq5cqX19W+haf78+YqOjnZMZQAAAE7Krlt448ePV5cuXfT999+rsLBQb775pr7//ntt2rRJGzZscHSNAAAATsWuK1B33XWXduzYocLCQrVs2VJr1qxRQECAMjIyFBER4egabRQUFCg8PFwWi0VZWVk2fZ9//rnatWsnHx8f+fv765FHHtHBgwet/evXr5fFYimxZWdn2xxnzpw5Cg0NlZeXl6KiorRly5YyPScAAOBaTAeoy5cvq1+/frJYLHrnnXe0ZcsWff/991qyZIlatmxZFjXaeOmll1S3bt0S7QcOHFCPHj3UqVMnZWVl6fPPP9epU6f08MMPlxi7d+9enThxwrr9fj1XamqqEhISlJycrG3btun2229XbGysTp48WabnBQAAXIfpAFW1alUtW7asLGr5U6tXr9aaNWv0xhtvlOjLzMxUUVGRxo0bpwYNGqhNmzYaOXKksrKydPnyZZuxAQEBqlOnjnVzc/u/aZg2bZoGDBigp59+Ws2aNdO8efPk7e2td99996p1FRQUKC8vz2YDAACVl1238OLi4rRixQoHl3JtOTk5GjBggBYvXixvb+8S/REREXJzc9OCBQtUVFSk3NxcLV68WDExMapatarN2PDwcAUFBem+++7Txo0bre2XLl1SZmamzQ8iu7m5KSYmRhkZGVetbcKECfLz87NuISEhDjhjAADgrOxaRH7bbbcpJSVFGzduVEREhKpXr27TP2zYMIcU9xvDMBQfH69BgwYpMjLSZl3Tb8LCwrRmzRo99thjGjhwoIqKihQdHa1Vq1ZZxwQFBWnevHmKjIxUQUGB5s+frw4dOmjz5s1q06aNTp06paKiIgUGBtocOzAw0Pqbf1cyatQoJSQkWF/n5eURogAAqMTsClD/+Mc/VLNmTWVmZiozM9Omz2KxlDpAJSUladKkSdccs3v3bq1Zs0b5+fkaNWrUVcdlZ2drwIAB6tu3r3r16qX8/HyNHj1ajz76qNauXSuLxaLGjRurcePG1n3at2+vn376SdOnT9fixYtLVfOVeHp6ytPT0+79AQCAa7ErQB04cMAhb56YmKj4+Phrjqlfv77WrVunjIyMEiElMjJSffr00aJFizRnzhz5+flp8uTJ1v4lS5YoJCREmzdvVrt27a54/LZt2+rrr7+WJN18881yd3dXTk6OzZicnBzVqVPHjjMEAACVkV0BylH8/f3l7+//p+NmzpypcePGWV8fP35csbGxSk1NVVRUlKRff+D494vBJcnd3V2SVFxcfNVjZ2VlKSgoSJLk4eGhiIgIpaenKy4uzrpvenq6hg4daurcAABA5WV3gDp69Kg+/fRTHT58WJcuXbLpmzZt2nUX9nu33nqrzesaNWpIkho0aKDg4GBJ0oMPPqjp06crJSXFegvvlVdeUb169dS6dWtJ0owZMxQWFqbmzZvr4sWLmj9/vtatW6c1a9ZYj52QkKC+ffsqMjJSbdu21YwZM3T+/Hk9/fTTDj0nAADguuwKUOnp6erevbvq16+vPXv2qEWLFjp48KAMw1CbNm0cXWOpdOrUSe+//74mT56syZMny9vbW9HR0UpLS1O1atUk/fotu8TERB07dkze3t5q1aqVvvjiC3Xs2NF6nMcff1w///yzRo8erezsbIWHhystLa3EwnIAAHDjshiGYZjdqW3bturSpYvGjh0rHx8f7dixQwEBAerTp48eeOABDR48uCxqdRl5eXny8/NTbm6ufH19K7ocAABQCmY+v+16DtTu3bv11FNPSZKqVKmiCxcuqEaNGkpJSfnTb9UBAAC4OrsCVPXq1a3rnoKCgvTTTz9Z+06dOuWYygAAAJyUXWug2rVrp6+//lpNmzZV165dlZiYqJ07d+qTTz656uMCAAAAKgu7AtS0adN07tw5SdLYsWN17tw5paam6rbbbnP4N/AAAACcjV2LyHFtLCIHAMD1lPkicgAAgBuZXbfw3NzcZLFYrtpfVFRkd0EAAADOzq4AtXz5cpvXly9f1vbt27Vo0SKNHTvWIYUBAAA4K4eugXr//feVmpqqf/3rX446pEtiDRQAAK6nwtZAtWvXTunp6Y48JAAAgNNxWIC6cOGCZs6cqVtuucVRhwQAAHBKdq2BqlWrls0icsMwlJ+fL29vby1ZssRhxQEAADgjuwLU9OnTbQKUm5ub/P39FRUVpVq1ajmsOAAAAGdkV4CKj493cBkAAACuw64A9e2335Z6bKtWrex5CwAAAKdlV4AKDw+/5oM0pV/XRVksFh6qCQAAKh27voX3ySefKCwsTHPnztX27du1fft2zZ07Vw0aNNCyZcu0f/9+HThwQPv373d0vQAAABXOritQ48eP18yZM9W1a1drW6tWrRQSEqJXX31VmZmZDisQAADA2dh1BWrnzp0KCwsr0R4WFqbvv//+uosCAABwZnYFqKZNm2rChAm6dOmSte3SpUuaMGGCmjZt6rDiAAAAnJFdt/DmzZunbt26KTg42Potu2+//VYWi0WfffaZQwsEAABwNnb/mPD58+f13nvvac+ePZJ+vSrVu3dvVa9e3aEFuiJ+TBgAANdj5vPbritQklS9enU9++yz9u4OAADgsuxaA7Vo0SKtXLnS+vqll15SzZo11b59ex06dMhhxQEAADgjuwLU+PHjVa1aNUlSRkaGZs+ercmTJ+vmm2/WiBEjHFogAACAs7HrFt6RI0fUsGFDSdKKFSv06KOP6tlnn9Wdd96pDh06OLI+AAAAp2PXFagaNWrof//7nyRpzZo1uu+++yRJXl5eunDhguOqAwAAcEJ2XYG677771L9/f7Vu3Vo//PCD9Ynk3333nUJDQx1ZHwAAgNOx6wrUnDlzFB0drZ9//lnLli3TTTfdJEnKzMxUr169HFogAACAs7H7OVCl8dxzzyklJUU333xzWb2FU+I5UAAAuB4zn992XYEqrSVLligvL68s3wIAAKDclWmAKsOLWwAAABWmTAMUAABAZUSAAgAAMIkABQAAYBIBCgAAwKQyDVBPPPEEX+MHAACVjl1PIpeks2fPasuWLTp58qSKi4tt+p566ilJ0ltvvXV91QEAADghuwLUZ599pj59+ujcuXPy9fWVxWKx9lksFmuAAgAAqIzsuoWXmJiofv366dy5czp79qzOnDlj3U6fPu3oGgEAAJyKXQHq2LFjGjZsmLy9vR1dDwAAgNOzK0DFxsbqm2++cXQtAAAALsGuNVAPPvigXnzxRX3//fdq2bKlqlatatPfvXt3hxQHAADgjOy6AjVgwAAdOXJEKSkp+utf/6q4uDjr9pe//MXRNdooKChQeHi4LBaLsrKybPqWLl2q8PBweXt7q169epoyZUqJ/devX682bdrI09NTDRs21MKFC0uMmTNnjkJDQ+Xl5aWoqCht2bKljM4GAAC4IrsCVHFx8VW3oqIiR9do46WXXlLdunVLtK9evVp9+vTRoEGDtGvXLs2dO1fTp0/X7NmzrWMOHDigBx98UB07dlRWVpaGDx+u/v376/PPP7eOSU1NVUJCgpKTk7Vt2zbdfvvtio2N1cmTJ8v0vAAAgOuwGIZhVHQRpbV69WolJCRo2bJlat68ubZv367w8HBJUu/evXX58mV99NFH1vGzZs3S5MmTdfjwYVksFr388stauXKldu3aZR3Ts2dPnT17VmlpaZKkqKgo3XHHHdbgVVxcrJCQED3//PNKSkq6Yl0FBQUqKCiwvs7Ly1NISIhyc3N5kCgAAC4iLy9Pfn5+pfr8tvtBmufPn9eGDRt0+PBhXbp0yaZv2LBh9h72qnJycjRgwACtWLHiit/+KygoKNFerVo1HT16VIcOHVJoaKgyMjIUExNjMyY2NlbDhw+XJF26dEmZmZkaNWqUtd/NzU0xMTHKyMi4am0TJkzQ2LFjr+PsAACAK7ErQG3fvl1du3bVL7/8ovPnz6t27do6deqUvL29FRAQ4PAAZRiG4uPjNWjQIEVGRurgwYMlxsTGxmrEiBGKj49Xx44d9eOPP2rq1KmSpBMnTig0NFTZ2dkKDAy02S8wMFB5eXm6cOGCzpw5o6KioiuO2bNnz1XrGzVqlBISEqyvf7sCBQAAKie71kCNGDFC3bp105kzZ1StWjX997//1aFDhxQREaE33nij1MdJSkqSxWK55rZnzx7NmjVL+fn5NleG/mjAgAEaOnSoHnroIXl4eKhdu3bq2bPnryfpVra/mezp6SlfX1+bDQAAVF52XYHKysrS22+/LTc3N7m7u6ugoED169fX5MmT1bdvXz388MOlOk5iYqLi4+OvOaZ+/fpat26dMjIy5OnpadMXGRmpPn36aNGiRbJYLJo0aZLGjx+v7Oxs+fv7Kz093XoMSapTp45ycnJsjpGTkyNfX19Vq1ZN7u7ucnd3v+KYOnXqlOqcAABA5WdXgKpatar1qk5AQIAOHz6spk2bys/PT0eOHCn1cfz9/eXv7/+n42bOnKlx48ZZXx8/flyxsbFKTU1VVFSUzVh3d3fdcsstkqQPPvhA0dHR1veIjo7WqlWrbMavXbtW0dHRkiQPDw9FREQoPT1dcXFxkn5dRJ6enq6hQ4eW+rwAAEDlZleAat26tbZu3arbbrtN9957r0aPHq1Tp05p8eLFatGihaNr1K233mrzukaNGpKkBg0aKDg4WJJ06tQpffzxx+rQoYMuXryoBQsW6KOPPtKGDRus+w0aNEizZ8/WSy+9pH79+mndunVaunSpVq5caR2TkJCgvn37KjIyUm3bttWMGTN0/vx5Pf300w4/LwAA4JrsClDjx49Xfn6+JOn111/XU089pcGDB+u2227Tu+++69ACzVi0aJFGjhwpwzAUHR2t9evXq23bttb+sLAwrVy5UiNGjNCbb76p4OBgzZ8/X7GxsdYxjz/+uH7++WeNHj1a2dnZCg8PV1paWomF5QAA4MblUs+BchVmniMBAACcg5nPb7u/nlZYWKgvvvhCb7/9tvVq1PHjx3Xu3Dl7DwkAAOAS7LqFd+jQIT3wwAM6fPiwCgoKdN9998nHx0eTJk1SQUGB5s2b5+g6AQAAnIZdV6BeeOEFRUZGWp8D9Zu//OUv1kcHAAAAVFZ2XYH6z3/+o02bNsnDw8OmPTQ0VMeOHXNIYQAAAM7KritQxcXFKioqKtF+9OhR+fj4XHdRAAAAzsyuAHX//fdrxowZ1tcWi0Xnzp1TcnKyunbt6qjaAAAAnJJdjzE4evSoYmNjZRiG9u3bp8jISO3bt08333yzvvrqKwUEBJRFrS6DxxgAAOB6zHx+2/0cqMLCQn344Yf69ttvde7cObVp00Z9+vSxWVR+oyJAAQDgesx8ftu1iFySqlSpoieeeMLe3QEAAFyW3QHq+PHj+vrrr3Xy5EkVFxfb9A0bNuy6CwMAAHBWdgWohQsXauDAgfLw8NBNN90ki8Vi7bNYLAQoAABQqdm1BiokJESDBg3SqFGj5OZm96/BVFqsgQIAwPWU+W/h/fLLL+rZsyfhCQAA3JDsSkDPPPOMPvroI0fXAgAA4BLsuoVXVFSkhx56SBcuXFDLli1VtWpVm/5p06Y5rEBXxC08AABcT5k/xmDChAn6/PPP1bhxY0kqsYgcAACgMrMrQE2dOlXvvvuu4uPjHVwOAACA87NrDZSnp6fuvPNOR9cCAADgEuwKUC+88IJmzZrl6FoAAABcgl238LZs2aJ169bp3//+t5o3b15iEfknn3zikOIAAACckV0BqmbNmnr44YcdXQsAAIBLsCtALViwoFTjNm7cqMjISHl6etrzNgAAAE6pTB8l3qVLFx07dqws3wIAAKDclWmAsuMZnQAAAE6PH7MDAAAwiQAFAABgEgEKAADApDINUPwuHgAAqIxYRA4AAGCSXc+BKq38/PyyPDwAAECFsCtAtW7d+oq35ywWi7y8vNSwYUPFx8erY8eO110gAACAs7HrFt4DDzyg/fv3q3r16urYsaM6duyoGjVq6KefftIdd9yhEydOKCYmRv/6178cXS8AAECFs+sK1KlTp5SYmKhXX33Vpn3cuHE6dOiQ1qxZo+TkZL322mvq0aOHQwoFAABwFhbDjpXefn5+yszMVMOGDW3af/zxR0VERCg3N1d79uzRHXfccUOug8rLy5Ofn59yc3Pl6+tb0eUAAIBSMPP5bdctPC8vL23atKlE+6ZNm+Tl5SVJKi4utv47AABAZWLXLbznn39egwYNUmZmpu644w5J0tatWzV//ny98sorkqTPP/9c4eHhDisUAADAWdh1C0+S3nvvPc2ePVt79+6VJDVu3FjPP/+8evfuLUm6cOGC9Vt5Nxpu4QEA4HrMfH7bHaBwdQQoAABcT5mvgZKks2fPWm/ZnT59WpK0bds2HTt2zN5DAgAAuAS71kB9++23iomJkZ+fnw4ePKj+/furdu3a+uSTT3T48GH985//dHSdAAAATsOuK1AJCQmKj4/Xvn37bNY4de3aVV999ZXDigMAAHBGdgWorVu3auDAgSXab7nlFmVnZ193UQAAAM7MrgDl6empvLy8Eu0//PCD/P39r7uoaykoKFB4eLgsFouysrJs+pYuXarw8HB5e3urXr16mjJlik3/+vXrZbFYSmx/DH1z5sxRaGiovLy8FBUVpS1btpTpOQEAANdiV4Dq3r27UlJSdPnyZUm//ojw4cOH9fLLL+uRRx5xaIF/9NJLL6lu3bol2levXq0+ffpo0KBB2rVrl+bOnavp06dr9uzZJcbu3btXJ06csG4BAQHWvtTUVCUkJCg5OVnbtm3T7bffrtjYWJ08ebJMzwsAALgQww5nz541YmJijJo1axru7u5GSEiIUbVqVePuu+82zp07Z88hS2XVqlVGkyZNjO+++86QZGzfvt3a16tXL+PRRx+1GT9z5kwjODjYKC4uNgzDML788ktDknHmzJmrvkfbtm2NIUOGWF8XFRUZdevWNSZMmFDqOnNzcw1JRm5ubqn3AQAAFcvM57dd38Lz8/PT2rVrtXHjRu3YsUPnzp1TmzZtFBMT48hsZyMnJ0cDBgzQihUr5O3tXaK/oKCgRHu1atV09OhRHTp0SKGhodb28PBwFRQUqEWLFhozZozuvPNOSdKlS5eUmZmpUaNGWce6ubkpJiZGGRkZV62toKBABQUF1tdXur0JAAAqD7ufA5Wenq6VK1dq27Zt2rNnj95//33169dP/fr1c2R9kiTDMBQfH69BgwYpMjLyimNiY2P1ySefKD09XcXFxfrhhx80depUSdKJEyckSUFBQZo3b56WLVumZcuWKSQkRB06dNC2bdskSadOnVJRUZECAwNtjh0YGHjNxfETJkyQn5+fdQsJCXHEaQMAACdlV4AaO3as7r//fqWnp+vUqVM6c+aMzVZaSUlJV1zU/fttz549mjVrlvLz822uDP3RgAEDNHToUD300EPy8PBQu3bt1LNnz19P0u3X02zcuLEGDhyoiIgItW/fXu+++67at2+v6dOn2zMNVqNGjVJubq51O3LkyHUdDwAAODe7buHNmzdPCxcu1JNPPnldb56YmKj4+Phrjqlfv77WrVunjIwMeXp62vRFRkaqT58+WrRokSwWiyZNmqTx48crOztb/v7+Sk9Ptx7jatq2bauvv/5aknTzzTfL3d1dOTk5NmNycnJUp06dqx7D09OzRG0AAKDysitAXbp0Se3bt7/uN/f39y/VYw9mzpypcePGWV8fP35csbGxSk1NVVRUlM1Yd3d33XLLLZKkDz74QNHR0dd8j6ysLAUFBUmSPDw8FBERofT0dMXFxUmSiouLlZ6erqFDh5o9PQAAUEnZFaD69++v999/X6+++qqj67miW2+91eZ1jRo1JEkNGjRQcHCwpF/XL3388cfq0KGDLl68qAULFuijjz7Shg0brPvNmDFDYWFhat68uS5evKj58+dr3bp1WrNmjXVMQkKC+vbtq8jISLVt21YzZszQ+fPn9fTTT5fDmQIAAFdgV4C6ePGi/v73v+uLL75Qq1atVLVqVZv+adOmOaQ4sxYtWqSRI0fKMAxFR0dr/fr1atu2rbX/0qVLSkxM1LFjx+Tt7a1WrVrpiy++UMeOHa1jHn/8cf38888aPXq0srOzFR4errS0tBILywEAwI3LYhiGYXan3weOEge0WLRu3brrKsrV5eXlyc/PT7m5ufL19a3ocgAAQCmY+fy26wrUl19+aVdhAAAAlYHdz4ECAAC4URGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIACAAAwiQAFAABgEgEKAADAJAIUAACASQQoAAAAkwhQAAAAJhGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMcpkAFRoaKovFYrNNnDjRZsy3336ru+++W15eXgoJCdHkyZNLHOejjz5SkyZN5OXlpZYtW2rVqlU2/YZhaPTo0QoKClK1atUUExOjffv2lem5AQAA1+IyAUqSUlJSdOLECev2/PPPW/vy8vJ0//33q169esrMzNSUKVM0ZswY/f3vf7eO2bRpk3r16qVnnnlG27dvV1xcnOLi4rRr1y7rmMmTJ2vmzJmaN2+eNm/erOrVqys2NlYXL14s13MFAADOy2IYhlHRRZRGaGiohg8fruHDh1+x/6233tLf/vY3ZWdny8PDQ5KUlJSkFStWaM+ePZKkxx9/XOfPn9e///1v637t2rVTeHi45s2bJ8MwVLduXSUmJmrkyJGSpNzcXAUGBmrhwoXq2bNnqWrNy8uTn5+fcnNz5evrex1nDQAAyouZz2+XugI1ceJE3XTTTWrdurWmTJmiwsJCa19GRobuuecea3iSpNjYWO3du1dnzpyxjomJibE5ZmxsrDIyMiRJBw4cUHZ2ts0YPz8/RUVFWcdcSUFBgfLy8mw2AABQeVWp6AJKa9iwYWrTpo1q166tTZs2adSoUTpx4oSmTZsmScrOzlZYWJjNPoGBgda+WrVqKTs729r2+zHZ2dnWcb/f70pjrmTChAkaO3bs9Z0gAABwGRV6BSopKanEwvA/br/dfktISFCHDh3UqlUrDRo0SFOnTtWsWbNUUFBQkacgSRo1apRyc3Ot25EjRyq6JAAAUIYq9ApUYmKi4uPjrzmmfv36V2yPiopSYWGhDh48qMaNG6tOnTrKycmxGfPb6zp16lj/eaUxv+//rS0oKMhmTHh4+FVr9PT0lKen5zXPAwAAVB4VGqD8/f3l7+9v175ZWVlyc3NTQECAJCk6Olp/+9vfdPnyZVWtWlWStHbtWjVu3Fi1atWyjklPT7dZiL527VpFR0dLksLCwlSnTh2lp6dbA1NeXp42b96swYMH23mWAACgsnGJReQZGRmaMWOGduzYof379+u9997TiBEj9MQTT1jDUe/eveXh4aFnnnlG3333nVJTU/Xmm28qISHBepwXXnhBaWlpmjp1qvbs2aMxY8bom2++0dChQyVJFotFw4cP17hx4/Tpp59q586deuqpp1S3bl3FxcVVxKkDAABnZLiAzMxMIyoqyvDz8zO8vLyMpk2bGuPHjzcuXrxoM27Hjh3GXXfdZXh6ehq33HKLMXHixBLHWrp0qdGoUSPDw8PDaN68ubFy5Uqb/uLiYuPVV181AgMDDU9PT6Nz587G3r17TdWbm5trSDJyc3PNnywAAKgQZj6/XeY5UK6E50ABAOB6Ku1zoAAAAJwBAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIACAAAwiQAFAABgEgEKAADAJAIUAACASQQoAAAAkwhQAAAAJhGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgElVKroAlF5RsaEtB07rZP5FBfh4qW1Ybbm7WSq6LAAAbjgEKBeRtuuExn72vU7kXrS2Bfl5KblbMz3QIqgCKwMA4MbDLTwXkLbrhAYv2WYTniQpO/eiBi/ZprRdJyqoMgAAbkwEKCdXVGxo7Gffy7hC329tYz/7XkXFVxoBAADKAgHKyW05cLrElaffMySdyL2oLQdOl19RAADc4AhQTu5k/tXDkz3jAADA9SNAObkAHy+HjgMAANePAOXk2obVVpCfl672sAKLfv02Xtuw2uVZFgAANzQClJNzd7MouVszSSoRon57ndytGc+DAgCgHBGgXMADLYL01hNtVMfP9jZdHT8vvfVEG54DBQBAOeNBmi7igRZBuq9ZHZ5EDgCAEyBAuRB3N4uiG9xU0WUAAHDD4xYeAACASQQoAAAAkwhQAAAAJhGgAAAATCJAAQAAmESAAgAAMIkABQAAYBIBCgAAwCQCFAAAgEk8ibwMGIYhScrLy6vgSgAAQGn99rn92+f4tRCgykB+fr4kKSQkpIIrAQAAZuXn58vPz++aYyxGaWIWTCkuLtbx48fl4+Mji8WxP/abl5enkJAQHTlyRL6+vg49Nv4P81w+mOfywTyXD+a5/JTVXBuGofz8fNWtW1dubtde5cQVqDLg5uam4ODgMn0PX19f/gctB8xz+WCeywfzXD6Y5/JTFnP9Z1eefsMicgAAAJMIUAAAACYRoFyMp6enkpOT5enpWdGlVGrMc/lgnssH81w+mOfy4wxzzSJyAAAAk7gCBQAAYBIBCgAAwCQCFAAAgEkEKAAAAJMIUE5kwoQJuuOOO+Tj46OAgADFxcVp7969f7rfRx99pCZNmsjLy0stW7bUqlWryqFa12XPPL/zzju6++67VatWLdWqVUsxMTHasmVLOVXsmuz98/ybDz/8UBaLRXFxcWVXZCVg7zyfPXtWQ4YMUVBQkDw9PdWoUSP+7rgGe+d5xowZaty4sapVq6aQkBCNGDFCFy9eLIeKXddbb72lVq1aWR+SGR0drdWrV19zn4r4HCRAOZENGzZoyJAh+u9//6u1a9fq8uXLuv/++3X+/Pmr7rNp0yb16tVLzzzzjLZv3664uDjFxcVp165d5Vi5a7FnntevX69evXrpyy+/VEZGhkJCQnT//ffr2LFj5Vi5a7Fnnn9z8OBBjRw5UnfffXc5VOra7JnnS5cu6b777tPBgwf18ccfa+/evXrnnXd0yy23lGPlrsWeeX7//feVlJSk5ORk7d69W//4xz+UmpqqV155pRwrdz3BwcGaOHGiMjMz9c0336hTp07q0aOHvvvuuyuOr7DPQQNO6+TJk4YkY8OGDVcd89hjjxkPPvigTVtUVJQxcODAsi6v0ijNPP9RYWGh4ePjYyxatKgMK6tcSjvPhYWFRvv27Y358+cbffv2NXr06FE+BVYSpZnnt956y6hfv75x6dKlcqyscinNPA8ZMsTo1KmTTVtCQoJx5513lnV5lU6tWrWM+fPnX7Gvoj4HuQLlxHJzcyVJtWvXvuqYjIwMxcTE2LTFxsYqIyOjTGurTEozz3/0yy+/6PLly6b2udGVdp5TUlIUEBCgZ555pjzKqnRKM8+ffvqpoqOjNWTIEAUGBqpFixYaP368ioqKyqtMl1eaeW7fvr0yMzOtt/v379+vVatWqWvXruVSY2VQVFSkDz/8UOfPn1d0dPQVx1TU5yA/JuykiouLNXz4cN15551q0aLFVcdlZ2crMDDQpi0wMFDZ2dllXWKlUNp5/qOXX35ZdevWLfE/La6stPP89ddf6x//+IeysrLKr7hKpLTzvH//fq1bt059+vTRqlWr9OOPP+q5557T5cuXlZycXI4Vu6bSznPv3r116tQp3XXXXTIMQ4WFhRo0aBC38Eph586dio6O1sWLF1WjRg0tX75czZo1u+LYivocJEA5qSFDhmjXrl36+uuvK7qUSs2eeZ44caI+/PBDrV+/Xl5eXmVYXeVRmnnOz8/Xk08+qXfeeUc333xzOVZXeZT2z3NxcbECAgL097//Xe7u7oqIiNCxY8c0ZcoUAlQplHae169fr/Hjx2vu3LmKiorSjz/+qBdeeEGvvfaaXn311XKq1jU1btxYWVlZys3N1ccff6y+fftqw4YNVw1RFaJMbxDCLkOGDDGCg4ON/fv3/+nYkJAQY/r06TZto0ePNlq1alVG1VUeZub5N1OmTDH8/PyMrVu3lmFllUtp53n79u2GJMPd3d26WSwWw2KxGO7u7saPP/5YThW7JjN/nu+55x6jc+fONm2rVq0yJBkFBQVlVWKlYGae77rrLmPkyJE2bYsXLzaqVatmFBUVlVWJlVLnzp2NZ5999op9FfU5yBooJ2IYhoYOHarly5dr3bp1CgsL+9N9oqOjlZ6ebtO2du3aq94rhn3zLEmTJ0/Wa6+9prS0NEVGRpZxla7P7Dw3adJEO3fuVFZWlnXr3r27OnbsqKysLIWEhJRT5a7Fnj/Pd955p3788UcVFxdb23744QcFBQXJw8OjLMt1WfbM8y+//CI3N9uPWXd3d+vxUHrFxcUqKCi4Yl+FfQ6WaTyDKYMHDzb8/PyM9evXGydOnLBuv/zyi3XMk08+aSQlJVlfb9y40ahSpYrxxhtvGLt37zaSk5ONqlWrGjt37qyIU3AJ9szzxIkTDQ8PD+Pjjz+22Sc/P78iTsEl2DPPf8S38P6cPfN8+PBhw8fHxxg6dKixd+9e49///rcREBBgjBs3riJOwSXYM8/JycmGj4+P8cEHHxj79+831qxZYzRo0MB47LHHKuIUXEZSUpKxYcMG48CBA8a3335rJCUlGRaLxVizZo1hGM7zOUiAciKSrrgtWLDAOubee+81+vbta7Pf0qVLjUaNGhkeHh5G8+bNjZUrV5Zv4S7GnnmuV6/eFfdJTk4u9/pdhb1/nn+PAPXn7J3nTZs2GVFRUYanp6dRv3594/XXXzcKCwvLt3gXYs88X7582RgzZozRoEEDw8vLywgJCTGee+4548yZM+Vevyvp16+fUa9ePcPDw8Pw9/c3OnfubA1PhuE8n4MWw+A6IgAAgBmsgQIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAmEaAAAABMIkABAACYRIAC4PIOHjwoi8WirKysii7Fas+ePWrXrp28vLwUHh5eITWEhoZqxowZFfLeQGVHgAJw3eLj42WxWDRx4kSb9hUrVshisVRQVRUrOTlZ1atX1969e0v80OlvmDfAdRGgADiEl5eXJk2apDNnzlR0KQ5z6dIlu/f96aefdNddd6levXq66aabrjquMs4bcCMgQAFwiJiYGNWpU0cTJky46pgxY8aUuJ01Y8YMhYaGWl/Hx8crLi5O48ePV2BgoGrWrKmUlBQVFhbqxRdfVO3atRUcHKwFCxaUOP6ePXvUvn17eXl5qUWLFtqwYYNN/65du9SlSxfVqFFDgYGBevLJJ3Xq1Clrf4cOHTR06FANHz5cN998s2JjY694HsXFxUpJSVFwcLA8PT0VHh6utLQ0a7/FYlFmZqZSUlJksVg0ZsyY65o3SVq2bJmaN28uT09PhYaGaurUqTb9J0+eVLdu3VStWjWFhYXpvffeK3GMs2fPqn///vL395evr686deqkHTt2WPt37Nihjh07ysfHR76+voqIiNA333xzzbqAGxUBCoBDuLu7a/z48Zo1a5aOHj16Xcdat26djh8/rq+++krTpk1TcnKyHnroIdWqVUubN2/WoEGDNHDgwBLv8+KLLyoxMVHbt29XdHS0unXrpv/973+Sfg0PnTp1UuvWrfXNN98oLS1NOTk5euyxx2yOsWjRInl4eGjjxo2aN2/eFet78803NXXqVL3xxhv69ttvFRsbq+7du2vfvn2SpBMnTqh58+ZKTEzUiRMnNHLkyKuea2nmLTMzU4899ph69uypnTt3asyYMXr11Ve1cOFC65j4+HgdOXJEX375pT7++GPNnTtXJ0+etDnOX//6V508eVKrV69WZmam2rRpo86dO+v06dOSpD59+ig4OFhbt25VZmamkpKSVLVq1avWDtzQDAC4Tn379jV69OhhGIZhtGvXzujXr59hGIaxfPly4/d/zSQnJxu33367zb7Tp0836tWrZ3OsevXqGUVFRda2xo0bG3fffbf1dWFhoVG9enXjgw8+MAzDMA4cOGBIMiZOnGgdc/nyZSM4ONiYNGmSYRiG8dprrxn333+/zXsfOXLEkGTs3bvXMAzDuPfee43WrVv/6fnWrVvXeP31123a7rjjDuO5556zvr799tuN5OTkax6ntPPWu3dv47777rPZ98UXXzSaNWtmGIZh7N2715BkbNmyxdq/e/duQ5Ixffp0wzAM4z//+Y/h6+trXLx40eY4DRo0MN5++23DMAzDx8fHWLhw4Z+cPQDDMAyuQAFwqEmTJmnRokXavXu33cdo3ry53Nz+76+nwMBAtWzZ0vra3d1dN910U4krLNHR0dZ/r1KliiIjI6117NixQ19++aVq1Khh3Zo0aSLp1/VKv4mIiLhmbXl5eTp+/LjuvPNOm/Y777zzus75WvO2e/fuK77fvn37VFRUpN27d6tKlSo2tTdp0kQ1a9a0vt6xY4fOnTunm266yWYODhw4YD3/hIQE9e/fXzExMZo4caLNvACwRYAC4FD33HOPYmNjNWrUqBJ9bm5uMgzDpu3y5cslxv3xtpHFYrliW3FxcanrOnfunLp166asrCybbd++fbrnnnus46pXr17qYzrStebNEc6dO6egoKAS57937169+OKLkn5do/bdd9/pwQcf1Lp169SsWTMtX768TOoBXF2Vii4AQOUzceJEhYeHq3Hjxjbt/v7+ys7OlmEY1q/pO/LZTf/973+tYaiwsFCZmZkaOnSoJKlNmzZatmyZQkNDVaWK/X/1+fr6qm7dutq4caPuvfdea/vGjRvVtm3b66r/avPWtGlTbdy40aZt48aNatSokdzd3dWkSRPr+d5xxx2SpL179+rs2bPW8W3atFF2draqVKlis2j/jxo1aqRGjRppxIgR6tWrlxYsWKC//OUv13VeQGXEFSgADteyZUv16dNHM2fOtGnv0KGDfv75Z02ePFk//fST5syZo9WrVzvsfefMmaPly5drz549GjJkiM6cOaN+/fpJkoYMGaLTp0+rV69e2rp1q3766Sd9/vnnevrpp1VUVGTqfV588UVNmjRJqamp2rt3r5KSkpSVlaUXXnjhuuq/2rwlJiYqPT1dr732mn744QctWrRIs2fPti5Ob9y4sR544AENHDhQmzdvVmZmpvr3769q1apZjxETE6Po6GjFxcVpzZo1OnjwoDZt2qS//e1v+uabb3ThwgUNHTpU69ev16FDh7Rx40Zt3bpVTZs2va5zAiorAhSAMpGSklLiFlvTpk01d+5czZkzR7fffru2bNlyzW+omTVx4kRNnDhRt99+u77++mt9+umnuvnmmyXJetWoqKhI999/v1q2bKnhw4erZs2aNuutSmPYsGFKSEhQYmKiWrZsqbS0NH366ae67bbbrvscrjRvbdq00dKlS/Xhhx+qRYsWGj16tFJSUhQfH28ds2DBAtWtW1f33nuvHn74YT377LMKCAiw9lssFq1atUr33HOPnn76aTVq1Eg9e/bUoUOHFBgYKHd3d/3vf//TU089pUaNGumxxx5Tly5dNHbs2Os+J6Ayshh/XJAAAACAa+IKFAAAgEkEKAAAAJMIUAAAACYRoAAAAEwiQAEAAJhEgAIAADCJAAUAAGASAQoAAMAkAhQAAIBJBCgAAACTCFAAAAAm/X9rvXat8Ec1MAAAAABJRU5ErkJggg==", + "image/png": "", "text/plain": [ "
" ] diff --git a/Tutorial/5_Genetic_Feature_Selection.ipynb b/Tutorial/5_Genetic_Feature_Selection.ipynb index a9afcf4b..d062c5b4 100644 --- a/Tutorial/5_Genetic_Feature_Selection.ipynb +++ b/Tutorial/5_Genetic_Feature_Selection.ipynb @@ -18,14 +18,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 5/5 [00:08<00:00, 1.66s/it]\n" + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n", + "Perhaps you already have a cluster running?\n", + "Hosting the HTTP server on port 35727 instead\n", + " warnings.warn(\n", + "Generation: 100%|██████████| 5/5 [04:07<00:00, 49.49s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9377587991718427\n" + "0.9554814292129066\n" ] } ], @@ -478,62 +482,62 @@ " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "
Pipeline(steps=[('maskselector',\n",
-       "                 MaskSelector(mask=array([False,  True, False, False, False,  True,  True, False, False,\n",
-       "        True, False,  True,  True, False, False, False,  True, False,\n",
-       "       False, False, False,  True, False, False, False, False, False,\n",
-       "        True, False, False, False,  True,  True,  True, False,  True,\n",
-       "        True,  True,  True,  True, False,  True,  True, False, False,\n",
-       "       False, False,  True, False,  True, False, False, Fa...\n",
-       "        True,  True,  True, False, False,  True,  True, False, False,\n",
-       "        True, False, False, False, False, False,  True, False,  True,\n",
-       "       False, False,  True, False, False, False,  True,  True,  True,\n",
-       "       False, False, False,  True, False, False,  True, False, False,\n",
-       "       False,  True, False, False, False,  True,  True, False, False,\n",
+       "                 MaskSelector(mask=array([ True,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "        True, False,  True,  True,  True,  True,  True,  True,  True,\n",
+       "        True,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "        True,  True,  True, False, False, False,  True, False,  True,\n",
+       "        True,  True, False, False, False, False,  True, False,  True,\n",
+       "       False,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "       False,  True,  True,  True,  True, False, False,  True,  True,\n",
+       "        True, False, False,  True,  True, False, False, False, False,\n",
+       "        True,  True, False,  True,  True,  True, False,  True,  True,\n",
+       "        True, False,  True,  True,  True, False,  True,  True,  True,\n",
+       "        True,  True,  True,  True,  True, False, False, False,  True,\n",
        "        True]))),\n",
        "                ('graphpipeline',\n",
-       "                 GraphPipeline(graph=<networkx.classes.digraph.DiGraph object at 0x763168650dc0>))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MaskSelector(mask=array([ True,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "        True, False,  True,  True,  True,  True,  True,  True,  True,\n",
+       "        True,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "        True,  True,  True, False, False, False,  True, False,  True,\n",
+       "        True,  True, False, False, False, False,  True, False,  True,\n",
+       "       False,  True, False,  True,  True,  True,  True,  True,  True,\n",
+       "       False,  True,  True,  True,  True, False, False,  True,  True,\n",
+       "        True, False, False,  True,  True, False, False, False, False,\n",
+       "        True,  True, False,  True,  True,  True, False,  True,  True,\n",
+       "        True, False,  True,  True,  True, False,  True,  True,  True,\n",
+       "        True,  True,  True,  True,  True, False, False, False,  True,\n",
+       "        True]))
[('KNeighborsClassifier_1', 'PassKBinsDiscretizer_1'), ('KNeighborsClassifier_1', 'ColumnOneHotEncoder_1'), ('ColumnOneHotEncoder_1', 'PCA_1')]
" ], "text/plain": [ "Pipeline(steps=[('maskselector',\n", - " MaskSelector(mask=array([False, True, False, False, False, True, True, False, False,\n", - " True, False, True, True, False, False, False, True, False,\n", - " False, False, False, True, False, False, False, False, False,\n", - " True, False, False, False, True, True, True, False, True,\n", - " True, True, True, True, False, True, True, False, False,\n", - " False, False, True, False, True, False, False, Fa...\n", - " True, True, True, False, False, True, True, False, False,\n", - " True, False, False, False, False, False, True, False, True,\n", - " False, False, True, False, False, False, True, True, True,\n", - " False, False, False, True, False, False, True, False, False,\n", - " False, True, False, False, False, True, True, False, False,\n", + " MaskSelector(mask=array([ True, True, False, True, True, True, True, True, True,\n", + " True, False, True, True, True, True, True, True, True,\n", + " True, True, False, True, True, True, True, True, True,\n", + " True, True, True, False, False, False, True, False, True,\n", + " True, True, False, False, False, False, True, False, True,\n", + " False, True, False, True, True, True, True, True, True,\n", + " False, True, True, True, True, False, False, True, True,\n", + " True, False, False, True, True, False, False, False, False,\n", + " True, True, False, True, True, True, False, True, True,\n", + " True, False, True, True, True, False, True, True, True,\n", + " True, True, True, True, True, False, False, False, True,\n", " True]))),\n", " ('graphpipeline',\n", - " GraphPipeline(graph=))])" + " GraphPipeline(graph=))])" ] }, "execution_count": 2, @@ -545,6 +549,38 @@ "est.fitted_pipeline_" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAA2CAYAAAAPknk+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAM5UlEQVR4nO3dfVBUZfsH8O+6yy4QAbbALiAraDi82aTgItIoMzKjxowvNU7MkCKWSsEE0WSOpU4WQeM/WVmRjWIjaDmjWdZYDr5MJonKiFqElj7gFC8a4a5p6m/3+v3R404bqPi4b2f5fmbuGbnPffZc51xn1mv23OcclYgIiIiIiBRimLcDICIiIrobLF6IiIhIUVi8EBERkaKweCEiIiJFYfFCREREisLihYiIiBSFxQsREREpCosXIiIiUhQWL0RERKQoLF6IiIhIUdxWvPT29qKgoAChoaEIDw/HU089hcuXL992nZycHKhUKqdWXFzsrhCJiIhIgVTuerfRjBkz0NnZiZqaGty4cQNFRUWYMGEC6uvrb7lOTk4OxowZg9WrVzv6goODERoa6o4QiYiISIHc8stLa2srdu/ejY8++giZmZloaWmBxWLBli1bMH78eDQ1Nd1y3eDgYHz77bfIyclBfHw8srOz8dVXX7kjTCIiIlIgjTs+tLGxEeHh4cjIyMAnn3yCiooKrFu3DkuWLEFERASmTZuGtrY2REVF9Vu3trYWa9euhcFgwLx586DX6zF79mw0NzcjLS2t3/hr167h2rVrjr/tdjt6e3uh1+uhUqncsXtERETkYiICq9WKmJgYDBt2h99WxA0qKytlzJgxIiJiNpulpKREREQiIyPl3XfflZiYGKmqquq3Xk1NjUyePFkmT54smzdvltjYWJkzZ45kZmbKkiVLBtzWqlWrBAAbGxsbGxubH7Tz58/fsc64qzkvy5Ytw5tvvnnbMa2trdi+fTs2bdqEkydPIjg4GAsXLsQ333yD9vZ2xMfHIyUlBRqNBjt37uy3vl6vR29vr1OfWq1GamoqWlpa+o3/9y8vly5dgslkQntzPEJDbl+5zRkztl/fjtMnb7uOJ/l6fIPhD/sADLwfgzXQ/rr6uNxLfAO5l5hdPc7dPBHHYPPjiXNgsNvwxDk12O26+ri48vN8KTZPcPV34T9ZLtsxcvx/0NfXh7CwsNuOvavLRi+88AIWLFhw2zGjRo2C0WhET08PLl68CJvNhtraWrz33ntYsmQJEhMT0dDQgJSUlAHXv3TpEoKCgnD27FlcuXIFo0ePxvz58/Hll18OKsabl4pCQ4Yh9P7bFy8aVUC/vjut40m+Ht9g+MM+AAPvx2ANtL+uPi73Et9A7iVmV49zN0/EMdj8eOIcGOw2PHFODXa7rj4urvw8X4rNE1z9XTiQwUz5uKviJTIyEpGRkXccl5WVhb6+Ppw4cQIAMHPmTJhMJogINmzYgOTkZFy4cOGW66tUKhiNRnz33XcAgOjo6FuOraqqwquvvno3u0FEREQK5pYJu8nJyZg+fTqWLl0KAAgKCkJpaSny8/MxYsQIREVFwWq1IikpCR9//DHMZjN++eUX1NfX47777oPVakVUVJRj4m1vby+MRuOA26qoqMDTTz/t+NtisSA1NRWWy/Y7xvl/cqNfn8V65/U8xdfjGwx/2Adg4P0YrIH219XH5V7iG8i9xOzqce7miTgGmx9PnAOD3YYnzqnBbtfVx8WVn+dLsXmCq78LnZb/9//tQc1m+R/n5N7R77//LrNmzRIAolarpaioSKxWq9hsNgkJCZHo6GgBIPv27RMRkY6ODpk8ebJoNBpRqVQSFxcn+fn5Mn36dFGr1VJQUDDgdjhhl42NjY2NzX+ayyfs3q3ffvsNsbGxCAgIwPr162E2m/HWW29h06ZNSElJQXNzM+bPn4/Y2FhUVVUBAA4dOoQpU6aguroaeXl5qKurw+uvv47Fixejpqam3zZud6u01WpFXFwczp8/zwfdeZnFYmEufARz4TuYC9/CfHiX3MWt0m65bHRTREQE1Go1CgsLsXLlSnR1deHhhx/G1KlTodH8vemOjg6nICdNmoT6+nq88sorWL58ORITE5GdnQ2LxTLgNnQ6HXQ6nVNfeHg4gH9M3g0N5YnoI5gL38Fc+A7mwrcwH95zp7uMbnJr8aLVapGeno7AwEC0t7cD+PuXEZPJhNLSUgDA/v37+603d+5czJ07FwBgs9mQmpoKs9nszlCJiIhIIdxavAB/T6gtLCxERkaG47LRn3/+iaKiIgDod9lo9erVmDhxIh588EH09fVhzZo1aG9vd5qUS0REREOX24uXJ554AhcuXHC6bLR7924YDAYA/S8b/fHHH1i0aBG6urowfPhwpKen49ChQ7d8Lszt6HQ6rFq1qt9lJfI85sJ3MBe+g7nwLcyHcrh1wi4RERGRq/n2o/yIiIiI/oXFCxERESkKixciIiJSFBYvREREpCh+W7ysW7cO8fHxCAwMRGZmJpqamrwdkt+rqqrChAkTcP/99yMqKgqzZ89GW1ub05i//voLJSUl0Ov1CAkJweOPP47u7m4vRTx0VFdXQ6VSoby83NHHXHjOr7/+iieffBJ6vR5BQUEYO3Ysjh496lguIli5ciWio6MRFBSE3NxcnDlzxosR+y+bzYYVK1YgISEBQUFBGD16NF577TWn9+kwHwrgivcY+ZqtW7eKVquVDRs2yA8//CCLFi2S8PBw6e7u9nZofm3atGmyceNGOXXqlBw/flweffRRMZlMcvnyZceY4uJiiYuLk4aGBjl69KhMnDhRJk2a5MWo/V9TU5PEx8fLQw89JGVlZY5+5sIzent7ZeTIkbJgwQI5fPiwnD17Vr7++mv5+eefHWOqq6slLCxMPvvsM2lpaZGZM2dKQkKCXL161YuR+6fKykrR6/Wya9cuOXfunGzbtk1CQkJk7dq1jjHMh+/zy+LFbDZLSUmJ42+bzSYxMTFSVVXlxaiGnp6eHgEgBw4cEBGRvr4+CQgIkG3btjnGtLa2CgBpbGz0Vph+zWq1SmJiouzZs0emTJniKF6YC8956aWX5JFHHrnlcrvdLkajUdasWePo6+vrE51OJ1u2bPFEiENKXl6eLFy40Knvsccec7z8l/lQBr+7bHT9+nUcO3YMubm5jr5hw4YhNzcXjY2NXoxs6Ll06RIA4IEHHgAAHDt2DDdu3HDKTVJSEkwmE3PjJiUlJcjLy3M65gBz4Umff/45MjIyMHfuXERFRWHcuHFYv369Y/m5c+fQ1dXllIuwsDBkZmYyF24wadIkNDQ04PTp0wCAlpYWHDx4EDNmzADAfCiF25+w62kXL16EzWZzPMH3JoPBgJ9++slLUQ09drsd5eXlyM7ORlpaGgCgq6sLWq3W8eLMmwwGA7q6urwQpX/bunUrmpubceTIkX7LmAvPOXv2LN5//31UVFRg+fLlOHLkCJ577jlotVoUFhY6jvdA31nMhestW7YMFosFSUlJUKvVsNlsqKysREFBAQAwHwrhd8UL+YaSkhKcOnUKBw8e9HYoQ9L58+dRVlaGPXv2IDAw0NvhDGl2ux0ZGRl44403AADjxo3DqVOn8MEHH6CwsNDL0Q09n376Kerq6lBfX4/U1FQcP34c5eXliImJYT4UxO8uG0VERECtVve7a6K7uxtGo9FLUQ0tpaWl2LVrF/bt24cRI0Y4+o1GI65fv46+vj6n8cyN6x07dgw9PT0YP348NBoNNBoNDhw4gLfffhsajQYGg4G58JDo6Oh+72ZLTk5GR0cHADiON7+zPOPFF1/EsmXLkJ+fj7Fjx2LevHl4/vnnHS8HZj6Uwe+KF61Wi/T0dDQ0NDj67HY7GhoakJWV5cXI/J+IoLS0FDt27MDevXuRkJDgtDw9PR0BAQFOuWlra0NHRwdz42JTp07FyZMncfz4cUfLyMhAQUGB49/MhWdkZ2f3e2TA6dOnMXLkSABAQkICjEajUy4sFgsOHz7MXLjBlStXnF4GDABqtRp2ux0A86EY3p4x7A5bt24VnU4ntbW18uOPP8rixYslPDxcurq6vB2aX3vmmWckLCxM9u/fL52dnY525coVx5ji4mIxmUyyd+9eOXr0qGRlZUlWVpYXox46/nm3kQhz4SlNTU2i0WiksrJSzpw5I3V1dRIcHCybN292jKmurpbw8HDZuXOnnDhxQmbNmsVbc92ksLBQYmNjHbdKb9++XSIiImTp0qWOMcyH7/PL4kVE5J133hGTySRarVbMZrN8//333g7J7wEYsG3cuNEx5urVq/Lss8/K8OHDJTg4WObMmSOdnZ3eC3oI+Xfxwlx4zhdffCFpaWmi0+kkKSlJPvzwQ6fldrtdVqxYIQaDQXQ6nUydOlXa2tq8FK1/s1gsUlZWJiaTSQIDA2XUqFHy8ssvy7Vr1xxjmA/fpxL5x2MFiYiIiHyc3815ISIiIv/G4oWIiIgUhcULERERKQqLFyIiIlIUFi9ERESkKCxeiIiISFFYvBAREZGisHghIiIiRWHxQkRERIrC4oWIiIgUhcULERERKQqLFyIiIlKU/wd957LJvw2LtQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.imshow([est.fitted_pipeline_.steps[0][1].mask])" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -552,7 +588,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYGklEQVR4nO3deUBU9foG8GcWtmETRBZBUBE3FgG3FDW6UWmmV9PStFIzyyyXa24pYBcQ99K8mlYu6S/LMkuvWprL1St41UBBcAEVQRBQGBZh2Gb5/ZFOTTKGCnNmhufzl768M+dFi/P4/c45R6TRaDQgIiIiIpMnFnoAIiIiImocDHZEREREZoLBjoiIiMhMMNgRERERmQkGOyIiIiIzwWBHREREZCYY7IiIiIjMBIMdERERkZlgsCMiIiIyEwx2RERERGaCwY6IiIjITDDYEREREZkJBjsiIiIiM8FgR0RERGQmGOyIiIiIzASDHREREZGZYLAjIiIiMhMMdkRERERmgsGOiIiIyEww2BERERGZCQY7IiIiIjPBYEdERERkJhjsiIiIiMwEgx0RERGRmWCwIyIiIjITDHZEREREZoLBjoiIiMhMMNgRERERmQmp0AMQETUmlUoFuVyOwsJCFBYW4nZBAWqqqqBWqSCWSGBlY4NW7u5wc3ODm5sbnJ2dIZFIhB6biKhRiDQajUboIYiIHldJSQlSUlJwPjkZ1ZWV0CiVsKuqgqNcDgulEmKNBmqRCHVSKcqcnVFhYwORVAprW1sEhoaiW7ducHJyEvrbICJ6LAx2RGTSbt68icQTJ5CVmQkLhQLeOTfgIZfDsbISFiqV3tfVSSQos7VFvrMzcrzboE4mQzs/P4T17w8PDw8DfgdERI2HwY6ITJJSqURCQgLOJCTArqgIHbJz4FVUBIla/dDvpRKLkevigis+3qhwcUHPsDCEhYVBKuWnVYjItDDYEZHJKSgowL49e1CSm4fOmZnwy8uDuBF+lKlFImR6euKSnx+cvTzx/NChcHd3b4SJiYgMg8GOiExKdnY2ftixA7Kb+eh+8SIcFIpGP0a5TIakLl2gaN0aw0e9DB8fn0Y/BhFRU2CwIyKTkZ2dje+//hots3PQ68IFSB9h27WhlGIxTvl3hdzbGyNeeYXhjohMAu9jR0QmoaCgAD/s2AHn7Bw8kZ7epKEOAKRqNfqkpcM5Jwc/7PgWBQUFTXo8IqLGwGBHREZPqVRi3549kN3MR+8LFxrl83QNIdZo0Dv9Amzyb2L/nj1QKpUGOS4R0aNisCMio5eQkICS3Dx0v3ixyVfq/kyqVqP7hYuQ5+UhMTHRoMcmInpYDHZEZNRu3ryJMwkJ6JyZ2SQXSjSEo0KBThmZOH3iBPLz8wWZgYioIRjsiMioJZ44AbuiIvjl5Qk6R8e8PNgVFSHhxAlB5yAiehAGOyIyWiUlJcjKzESH7ByDfa5OH7FGA9/sHGRlZKCkpETQWYiI9GGwIyKjlZKSAguFAl5FRUKPAgBoU1QEqUKB1NRUoUchIqoXgx0RGSWVSoXzycnwzrnxSI8JawoStRo+N24gNSkJqgc8h5aISCgMdkRklORyOaorK+Ehlws9ig6P4t/mkhvZXEREAMAnXBORUSosLIRGqUSLigqdepcT/4WfrS1UGg18bWRY2rEjbCQSFNTUIPbaVVyqrISjVAovK2tE+/rCxdISADAvIwMZikrsCg554HHX5eRgR2EBqlQqnH6iz31fd6yshEapRGFhIVq1atV43zARUSPgih0RGaXCwkLYVVXdd986e6kUe0JCsS+0OyzEInxdkA+NRoN3LlxAuJMzDvfoiV3BIXitdWvI6+oAALVqNU6VlaJWrUZOddUDj9vPyQnfdQvW+3ULlQp2VVUoLCx87O+RiKixMdgRkVG6XVAAx7/Y7uzh4IicqmoklpVCJhHjJXd37dd6Ojqio60tAOBESQl6ODhicKtW2H/7wRdiBNnbw/XuKp8+DvIS3OYjxojICDHYEZFRqqmqgsUDHuGl1GhwvESOjrYyXFUo4G9np7d3f9FtDHJxwWCXVthfdPuxZ7NUKlFbXf3Y70NE1NgY7IjIKKlVqnrvXXdHqcTQs8l48dxZtLayxkg393pe/bsatRqny8rQz8kJ3jY2kIpEuPaYT7AQa9RQ8bmxRGSEePEEERklsUQCtUh0X/3eZ+z+yNdGhoNFxfW+z3/kcpQrlXgu6VcAQIVKhf1Ft/Get88jz6YWiSGR8scnERkfrtgRkVGysrFBXQPDU98WLVChUmLXHy5o+LWsDBmVldhfdBsrOnXG0Z69cLRnL3wfHIz9j3nD41qpFJbW1o/1HkRETYHBjoiMUit3d5Q5OzeoVyQSYV2XrviluBhP/3oGzycnYVv+TdhKJPhfaSn6tWih7fW2toEEImRUVtb7Xquyr6P/6VMoVyrR//QpbMrLva+n3NkJrdwfvAVMRCQEkUYj8AMYiYjqkZaWhv3ffYcXjh2HhRE95aFOIsHeJwfg+ZdeQkBAgNDjEBHp4IodERklNzc3iKRSlN29ZYmxKLO1hUgqhZubm9CjEBHdh5/+JSKj5OzsDGtbW+Q7O8OlvLzR3//Dq1eQ/Kf3nd22Hfo7OT3wdfktf5vLuYHbxEREhsRgR0RGSSKRIDA0FOeKi9E1JweSPz2B4nF96NvhoV+jEouR3aYNQrt3h0QiadR5iIgaA7diichodevWDXUyGXJdXJrk/cvvlONmfj5u3b6Fugbcl+6GiwuUMhmCgoKaZB4iosfFYEdERsvJyQnt/Pxwxce73nvaPY46pRIVFRUANFAqlZDL5VA/4FoytUiEqz7eaNexI5z+YruWiEgoDHZEZNTC+vdHhYsLMj09m/Q4KpUS5Q/4LF+GpycqXFwQ1q9fk85BRPQ4GOyIyKh5eHigZ1gYLvn5oVwma7T3tZBKYWlppVNTKCpRXVNzX2+ZTIbLHf3Qq18/eHh4NNoMRESNjcGOiIxeWFgYnLw8kdSlC5Tixvux1aJFC4hEuu9XWlqqsyWrFIuR1LULnD090bdv30Y7NhFRU2CwIyKjJ5VKMXjoUChat8Yp/66N9nk7qUQCBwcHnZparUJZWdlvvxaJcMq/K6o8WuP5oUMh5fNhicjIMdgRkUlwd3fH8FEvQ+7tjZMB/o22cmcrk8HKSve5r1VVClTW1uJkgD/k3t4YPupluPMRYkRkAvhIMSIyKdnZ2fhhx7eQ3byJ7hcvwkGheOz3VKlUuHX7NjSa3+6VV+nggMs9ekLTvh1GvPIKfHx8HvsYRESGwGBHRCanoKAA+/bsQUluHjpnZsIvLw/ix/xRpqiqgrysFDc7dkRm587Ik8tRVVeHbdu2QdTIt1ohImoqDHZEZJKUSiUSEhJwJiEBdkVF8M3OQZuiokd6QoVKLMYNFxeku7mi0MYGCWfOIDExESqVCl9//TVGjx7dBN8BEVHjY7AjIpN28+ZNJCYkICsjA1KFAj43bsCjWA7HykpYqFR6X1cnkaDM1hb5LZ2R3aYNlDIZPNq0QeyiRcjIyND2OTk5IT09nbc5ISKTwGBHRGahpKQEqampSE1KQnVlJTRKJeyqquAgL4GlUgmxRg21SIxaqRTlzk6osLGBSCqFta0tgrp3R1BQEJycnPDtt99i1KhROu/9wgsvYM+ePdySJSKjx2BHRGZFpVJBLpejsLAQhYWFuF1QgNrqaqiUSkikUlhaW6OVuzvc3Nzg5uYGZ2dnSCQSnfcYNWoUvv32W53apk2bMGHCBEN+K0RED43BjojoT4qKihAQEIDCwkJtzcHBAefPn4e3t7eAkxERPRjvY0dE9CcuLi747LPPdGrl5eWYOHEi+G9hIjJmDHZERPUYOnQoxo0bp1M7dOgQ1q9fL9BERER/jVuxRER6lJaWIiAgAHl5edqaTCZDamoqfH19BZyMiKh+XLEjItKjRYsW2LRpk05NoVBgwoQJUD3gVipEREJhsCMieoBnn30Wb7/9tk7tv//9L1avXi3QRERE+nErlojoL9y5cwfdunVDVlaWtmZlZYWzZ8+iS5cuAk5GRKSLK3ZERH/B3t4emzdv1qnV1NRg3LhxUCqVAk1FRHQ/BjsiogZ48sknMWPGDJ3amTNnsGzZMmEGIiKqB7diiYgaqKqqCsHBwTrPkrWwsMCZM2fQrVs3AScjIvoNV+yIiBrIxsYGX375JcTi33901tXVYdy4caitrRVwMiKi3zDYERE9hCeeeAJz5szRqaWkpCA2NlagiYiIfsetWCKih1RTU4MePXogLS1NW5NIJDh58iR69uwp4GRE1Nwx2BERPYKzZ8+iV69eOlfFdunSBcnJybC2thZwMiJqzrgVS0T0CEJCQhAVFaVTu3jx4n01IiJD4oodEdEjqqurQ58+fZCUlKStiUQiHD9+HP369RNwMiJqrhjsiIgeQ3p6OkJDQ3WuivX19UVKSgpsbW0FnIyImiNuxRIRPQZ/f3/ExcXp1K5evYq5c+cKNBERNWdcsSMiekwqlQoDBgxAYmKiTv3QoUN4+umnBZqKiJojBjsiokaQmZmJbt26oaqqSlvz9vZGamoqHB0dBZyMiJoTbsUSETUCPz8/LF26VKeWk5ODmTNnCjQRETVHXLEjImokarUaEREROHr0qE597969GDx4sEBTEVFzwmBHRNSIrl+/jsDAQFRUVGhr7u7uSE9Ph7Ozs4CTEVFzwK1YIqJG1LZtW3z88cc6tYKCAkydOlWgiYioOeGKHRFRI9NoNBg8eDB++uknnfrOnTsxYsQIgaYiouaAwY6IqAnk5eUhICAApaWl2pqLiwvS09Ph6uoq3GBEZNa4FUtE1AQ8PT2xZs0anVpRUREmT54M/nuaiJoKgx0RURMZO3Yshg8frlP74YcfsH37doEmIiJzx61YIqImdOvWLfj7+6OoqEhba9GiBdLS0uDp6SngZERkjrhiR0TUhFxdXfHpp5/q1EpLSzFp0iRuyRJRo2OwIyJqYiNHjsQrr7yiU/vpp5+wceNGgSYiInPFrVgiIgOQy+Xw9/dHQUGBtmZnZ4fz58+jbdu2wg1GRGaFK3ZERAbg7OyML774QqdWUVGBN954A2q1WqCpiMjcMNgRERnI4MGD8cYbb+jUjh49inXr1gk0ERGZG27FEhEZUFlZGQIDA3Hjxg1tzcbGBikpKfDz8xNwMiIyB1yxIyIyIEdHR2zatEmnVlVVhfHjx0OlUgk0FRGZCwY7IiIDi4iIwJQpU3RqiYmJ+OijjwSaiIjMBbdiiYgEUFFRgeDgYFy9elVbs7S0RHJyMvz9/QWcjIhMGVfsiIgEYGdnhy1btkAkEmlrtbW1GDduHOrq6gScjIhMGYMdEZFA+vXrh5kzZ+rUkpKSsGTJEoEmIiJTx61YIiIBVVVVITQ0FJcuXdLWpFIpTp8+jZCQEAEnIyJTxBU7IiIB2djY4Msvv4REItHWlEolxo0bh5qaGgEnIyJTxGBHRCSwXr16Yd68eTq18+fP45///KdAExGRqeJWLBGREaitrUXPnj2RmpqqrYnFYiQmJqJ3794CTkZEpoTBjojISKSkpKBnz546V8V27NgRZ8+ehUwmE3AyIjIV3IolIjIS3bp1w8KFC3VqGRkZWLBggUATEZGp4YodEZERUSqV6Nu3L86cOaOtiUQiHD16FE8++aSAkxGRKWCwIyIyMhcvXkRISIjOVbHt2rVDamoq7OzsBJyMiIwdt2KJiIxMly5dEB8fr1PLysrC7NmzBZqIiEwFV+yIiIyQSqVCeHg4Tpw4oVM/cOAAnn32WYGmIiJjx2BHRGSkrl69iqCgICgUCm3Ny8sL58+fR4sWLYQbjIiMFrdiiYiMlK+vL5YvX65Ty83NxYwZM4QZiIiMHlfsiIiMmFqtxnPPPYdDhw7p1Hfv3o2hQ4cKNBURGSsGOyIiI5eTk4PAwECUl5dra25ubkhPT0fLli0FnIyIjA23YomIjJy3tzdWrVqlUyssLMS7774rzEBEZLS4YkdEZAI0Gg2GDh2KvXv36tR37NiBl19+WaCpiMjYMNgREZmI/Px8+Pv7o6SkRFtr2bIl0tLS4O7uLuBkRGQsuBVLRGQiPDw8sHbtWp1acXEx3n77bfDf6EQEMNgREZmU0aNHY+TIkTq1PXv2YNu2bQJNRETGhFuxREQm5vbt2/D398ft27e1NUdHR6SlpcHLy0vAyYhIaFyxIyIyMa1atcJnn32mUysrK8PEiRO5JUvUzDHYERGZoGHDhuHVV1/VqR08ePC+wEdEzQu3YomITFRJSQkCAgJw8+ZNbc3W1hapqalo3769gJMRkVC4YkdEZKKcnJywceNGnVplZSXeeOMNqNVqgaYiIiEx2BERmbCBAwdi0qRJOrVjx45hzZo1Ak1ERELiViwRkYm7c+cOAgMDkZ2dra1ZW1vj3Llz6NSpk4CTEZGhccWOiMjE2dvbY/PmzTq16upqjB8/HkqlUqCpiEgIDHZERGbgqaeewtSpU3Vq//vf/7BixQqBJiIiIXArlojITCgUCgQHByMzM1Nbs7S0xK+//orAwEABJyMiQ+GKHRGRmZDJZNiyZQvE4t9/tNfW1mLcuHGora0VcDIiMhQGOyIiM9K3b1/MmjVLp3b27FnEx8cLNBERGRK3YomIzEx1dTV69OiB9PR0bU0ikeDUqVPo3r27gJMRUVPjih0RkZmxtrbGl19+CYlEoq2pVCqMGzcO1dXVAk5GRE2NwY6IyAx1794dkZGROrX09HQsXLhQoImIyBC4FUtEZKbq6urQu3dvnD17VlsTiUQ4ceIE+vbtK+BkRNRUGOyIiMzY+fPn0aNHD52rYjt06IBz587B1tZWwMmIqClwK5aIyIwFBgbin//8p07typUr+OCDDwSaiIiaElfsiIjMnFKpRP/+/fG///1Pp37kyBE89dRTAk1FRE2BwY6IqBm4fPkygoODda6K9fHxQWpqKhwcHAScjIgaE7diiYiagU6dOmHJkiU6tezs7PtuZkxEpo0rdkREzYRarcbf/vY3HDt2TKe+f/9+DBo0SKCpiKgxMdgRETUjWVlZCAwMRGVlpbbWunVrpKWlwcnJScDJiKgxcCuWiKgZadeuHVauXKlTu3nzJqZNmybQRETUmLhiR0TUzGg0GgwcOBAHDx7Uqe/atQvDhw8XaCoiagwMdkREzVBubi4CAgJQVlamrbVq1Qrp6elo1aqVgJMR0ePgViwRUTPk5eWFTz75RKd2+/ZtvPPOO+C/94lMF1fsiIiaKY1Gg2HDhmHPnj069e3bt+OVV14RaCoiehwMdkREzVhBQQECAgJQXFysrTk5OSE9PR0eHh4CTkZEj4JbsUREzZi7uzvWrVunUyspKcGkSZO4JUtkghjsiIiauZdffhmjRo3Sqe3btw9btmwRZiAiemTciiUiIhQXF8Pf3x+FhYXamr29PdLS0uDt7S3gZET0MLhiR0REaNmyJT777DOd2p07dzBx4kSo1WqBpiKih8VgR0REAIChQ4di3LhxOrVDhw5h/fr1Ak1ERA+LW7FERKRVWlqKwMBA5ObmamsymQypqanw9fUVcDIiagiu2BERkVaLFi2wceNGnZpCocD48eOhUqkEmoqIGorBjoiIdDz77LOYPHmyTu3EiRNYvXq1QBMRUUNxK5aIiO5TUVGBoKAgZGVlaWtWVlY4e/YsunTpIuBkRPQgXLEjIqL72NnZYfPmzRCJRNpaTU0Nxo0bB6VSKeBkRPQgDHZERFSvJ598EtOnT9epnTlzBkuXLhVoIiL6K9yKJSIivaqqqhASEoLLly9raxYWFjhz5gy6desm4GREVB+u2BERkV42Njb48ssvIRb/frqoq6vD66+/jtraWgEnI6L6MNgREdED9e7dG3PnztWppaamIjY2VqCJiEgfbsUSEdFfqqmpQc+ePXH+/HltTSKR4OTJk+jZs6eAkxHRHzHYERFRg5w9exa9evXSuSq2S5cuSEpKgo2NjYCTEdE93IolIqIGCQkJQVRUlE7t4sWL99WISDhcsSMiogarq6tDnz59kJSUpK2JRCIcP34c/fr1E3AyIgIY7IiI6CGlp6cjNDRU56pYX19fpKSkwNbWVsDJiIhbsURE9FD8/f0RFxenU7t69ep9V84SkeFxxY6IiB6aSqXCgAEDkJiYqFM/dOgQnn76aYGmIiIGOyIieiSZmZno1q0bqqqqtDVvb2+kpqbC0dFRwMmImi9uxRIR0SPx8/O777mxOTk5mDlzpkATERFX7IiI6JGp1WpERETg6NGjOvW9e/di8ODBAk1F1Hwx2BER0WO5fv06AgMDUVFRoa25u7sjPT0dzs7OAk5G1PxwK5aIiB5L27Zt8fHHH+vUCgoKMHXqVIEmImq+uGJHRESPTaPRYPDgwfjpp5906jt37sSIESMEmoqo+WGwIyKiRpGXl4eAgACUlpZqay4uLkhPT4erq6twgxE1I9yKJSKiRuHp6Yk1a9bo1IqKijB58mRwDYHIMBjsiIio0YwdOxbDhw/Xqf3www/Yvn27QBMRNS/ciiUiokZ169Yt+Pv7o6ioSFtr0aIF0tLS4OnpKeBkROaPK3ZERNSoXF1d8emnn+rUSktLMWnSJG7JEjUxBjsiImp0I0eOxCuvvKJT++mnn7Bx40aBJiJqHrgVS0RETUIul8Pf3x8FBQXamp2dHc6fP4+2bdsKNxiRGeOKHRERNQlnZ2d88cUXOrWKigq88cYbUKvVAk1FZN4Y7IiIqMkMHjwYb7zxhk7t6NGjWLdunUATEZk3bsUSEVGTKisrQ2BgIG7cuKGt2djYICUlBX5+fgJORmR+uGJHRERNytHREZs2bdKpVVVVYfz48VCpVAJNRWSeGOyIiKjJRUREYMqUKTq1xMREfPTRRwJNRGSeuBVLREQGUVFRgeDgYFy9elVbs7S0RHJyMvz9/QWcjMh8cMWOiIgMws7ODlu2bIFIJNLWamtrMW7cONTV1Qk4GZH5YLAjIiKD6devH2bOnKlTS0pKwpIlSwSaiMi8cCuWiIgMqqqqCqGhobh06ZK2JpVKcfr0aYSEhAg4GZHp44odEREZlI2NDb788ktIJBJtTalUYty4caipqRFwMiLTx2BHREQG16tXL8ybN0+ndv78efzzn/8UaCIi88CtWCIiEkRtbS169uyJ1NRUbU0sFiMxMRG9e/cWcDIi08VgR0REgklJSUHPnj11rort1KkTzp49CxsbGwEnIzJN3IolIiLBdOvWDQsXLtSpXb58GQsWLBBoIiLTxhU7IiISlFKpRN++fXHmzBltTSQS4ejRo3jyyScFnIzI9DDYERGR4C5evIiQkBCdq2LbtWuH1NRU2NnZCTgZkWnhViwREQmuS5cuiI+P16llZWVh9uzZAk1EZJq4YkdEREZBpVIhPDwcJ06c0KkfOHAAzz77rEBTEZkWBjsiIjIaV69eRVBQEBQKhbbm5eWF8+fPo0WLFsINRmQiuBVLRERGw9fXF8uXL9ep5ebmYsaMGcIMRGRiuGJHRERGRa1W47nnnsOhQ4d06rt378bQoUOhUql0HkdGRL/jih0RERkVsViMjRs3wsHBQac+adIkTJkyBS1atIC3t/d9n8UjIq7YERGRkdq8eTPeeOMNvV8PDg7G2bNnDTgRkfFjsCMiIqOk0WgwePBg/PTTT/V+XSQSoaqqClZWVlCpVJDL5SgsLERhYSFuFxSgpqoKapUKYokEVjY2aOXuDjc3N7i5ucHZ2ZnbuWSWpEIPQEREVJ/8/Hxcu3ZN79c1Gg3S09NRXl6O88nJqK6shEaphF1VFRzlctgolRBrNFCLRKiTSnHZ2RlJNjYQSaWwtrVFYGgounXrBicnJwN+V0RNiyt2RERklEaPHo0dO3bU+zV3d3f069sXIYGBkNXVwTvnBjzkcjhWVsJCpdL7nnUSCcpsbZHv7Iwc7zaok8nQzs8PYf37w8PDo6m+FSKD4YodEREZpaKiovtqEokEffv2RVjPnnCpqEDnX5Pge+cOJGp1g97TQqWCS3k5XMrL0TUnB7kuLrhSXIyvrlxBz7AwhIWFQSrlqZFMF1fsiIjIKB06dAhDhgxBdXU1AMDV1RVDBw+Gp5MT/C5dQuuMDNjZyNDC0fGxjqMWiZDp6YlLfn5w9vLE80OHwt3dvTG+BSKDY7AjIiKjdeXKFcydOxe//vorXh42DB4KBbokJUFWXg4AkEikcHN1bZRjlctkSOrSBYrWrTF81Mvw8fFplPclMiQGOyIiMmrZ2dn4eutWOF69ik4nT0Lyh8/QNWawAwClWIxT/l0h9/bGiFdeYbgjk8MbFBMRkdEqKCjADzt2wP1mPp66lgVnO3sAIu3X7ezsGvV4UrUafdLS4ZyTgx92fIuCgoJGfX+ipsZgR0RERkmpVGLfnj2Q3cxH7wsXINFoYCuTwcPdHU5OznB1dYOtTNboxxVrNOidfgE2+Texf88eKJXKRj8GUVNhsCMiIqOUkJCAktw8dL94EdI/XPUqEolgY20NaRPeYFiqVqP7hYuQ5+UhMTGxyY5D1NgY7IiIyOjcvHkTZxIS0DkzEw4KhSAzOCoU6JSRidMnTiA/P1+QGYgeFoMdEREZncQTJ2BXVAS/vDxB5+iYlwe7oiIknDgh6BxEDcVgR0RERqWkpARZmZnokJ0DscA3bhBrNPDNzkFWRgZKSkoEnYWoIRjsiIjIqKSkpMBCoYBXPU+eEEKboiJIFQqkpqYKPQrRX2KwIyIio6FSqXA+ORneOTca/JiwpiZRq+Fz4wZSk5KgesBzaImMAYMdEVEzkZubixdffBG+vr7o0aMHXnrpJRQWFtbb++GHH+Jf//pXk86j0WiwcOFCdOjQAX5+fhg8eDBSU1NRXVkJD7n8od/v1dRUZFRWan9/VF6MuRmXH/iaX4qLkFX1+8UZczMu429nzmDo2WQMPZuMqRcvAAA8iuWorqyE/BHm0mf8+PHYu3fvY7/P8OHD4eTkhJEjRzbCVGTqGOyIiJoBjUaDv//97xg8eDCuXr2KX3/9FdOmTcPt27cFm+mTTz5BcnIy0tLSkJmZiTFjxmDs2LFQ19WhRUWFQWY4VFyM61VVOrUo3/bYExKKPSGhWNOlKwDAsbISGqVSbxA2BLWeFczp06dj69atBp6GjBWDHRFRM3D48GHY2dlh4sSJ2lr//v3h6+uL1157DUFBQejVqxfOnTt332vDw8ORlpYGAEhLS0N4eDiA31b13njjDfTr1w/t2rXDzz//jHfeeQddu3bFq6++qn29i4sLZs2ahcDAQDz99NOovLuqtnz5cnzyySewtrYGAIwdOxYWFhbIu3QJBQoFhiQnY07GZQxM+hXTL13EvSdgnr9zB2NTUzD87Fm8nZ6O0rq6v/z+5XV1eDs9HUOSk/Bqaipyq6uRcqccR+RyxF69hqFnk1FcW6v39ZEXL+CnvXvx0ksvwc/PD8eOHQPw202Up0+fjsDAQAQFBeHbb78FAGzbtg2BgYEICAjA8uXLte/z4YcfolOnTvjb3/6mExIPHDiAPn36ICQkBK+++ipq787SsmVLvPfeewgMDERGRka9s4WHh8Pe3v4v/wyoeWCwIyJqBi5cuIDQ0ND76mvXroW9vT1SU1PxySefYNy4cQ/1vtnZ2Th27Bj+7//+DyNHjsSECROQnp6Oa9eu4ezZswCA4uJiDBw4EOfPn4enpyd27dqF8vJyKBQKtGvXTuf9PFu3RumNGwCAa1UKvOXlhZ9Cu6O4tg6/lpejTq3GkqxrWNulK34ICcEzLVtiQ+4N7eunXrqo3UaNvXpNW1+Tk40ejg74d2h3vOLhgbhrV9HN3gF/c3bWrtC1tLQEAG3QG3o2GfHXrmrfo66sHAsjI7FhwwbExMQAAD777DPI5XKkpKQgNTUVzzzzDPLy8vDhhx/i2LFj+PXXX/H1118jKSkJZ86cwb59+5CamoqvvvoKJ0+eBAAUFRVh+fLlOHLkCM6ePYv27dvj888/BwDI5XIMGjQI58+fR+fOnR/q74aaJ6nQAxARkXBOnDiBOXPmAACeeOIJVFVVoaysrMGvf/755yGRSBAYGAh7e3v06tULABAQEIDr168jJCQEdnZ2iIiIAAB0794d169f1/t+KqVSe9FEOxsbdJDZAgC62tkir6YajlIpLlVW4vW087/1azTo8IfHiq3p3AUdbX97zVF5MX6+e2VtUnk5Jnf1/21mFxcs+kNg+7Mo3/Z4yrnlffXebbxQW12t8z0cOnQIM2fOhFj82zqJk5MTjh07hqeffhrOzs4AgJEjR+LEiRPQaDQYPnw4rKys4OHhgb/97W8AgP/9739ITU1Fnz59AAA1NTUYPHgwAMDGxkb7a6KGYLAjImoGunTpgl27dj3Sa6VSqfbzXTU1NTpfs7KyAgCIxWLtr+/9/t4VpH+sSyQSqFQqODg4QCaT4fr162jbtq3269ezs9HbxwcoLYOl+PdNJbFIBLUGUAPoameHbYFBj/S93CN6hNdYisW/Bc+738MjHVd0/5HVajUGDx6MzZs33/c1WRM8C5fMG7diiYiagYiICJSXl2PLli3a2okTJ9CjRw9s374dAHD69GnIZDI4OjrqvNbHx0f72btHDYf1ef/99zF9+nRtWPz6669RU1MD/9at9b6mvY0N8mtqkFZxBwBQq1bjagMeOdbdwQF7714o8nNxEYLufibNViJBZQNDmhoiSKS66yERERH47LPPtMG3pKQEvXr1wuHDh1FSUoKamhrs2rUL/fv3R79+/fDjjz+itrYWBQUFOHr0KACgT58+OHr0KLKzswEA5eXlyMrKatBMRH/GFTsiomZAJBLhxx9/xLRp0xAbGwtra2sEBARg2bJlWLBgAYKCgmBtbV3vqtHMmTMxatQorF69Wrt92BimT58OuVwOf39/iEQidOjQAe//4x9Q3r1Qoz6WYjFWde6MuGvXUKlUQQ0NprTxhu9frGxN9fbBvIwM/HirEI5SCyzp2BEAMLhVK0RmZuKz3Fxs9g8A8Ntn7D6+G7IcpVLt6qBSIoHl3Qs97nnrrbdw6dIlBAYGQiqVIjIyEi+99BIWLlyIAQMGQKPRYNy4cdrPNw4aNAiBgYHw9PTEE088AQBo1aoVPv/8c4wYMQK1tbUQi8VYtWrVfZ8/1CciIgIpKSmorKyEl5cXvvvuO+22LjU/Io1G4Oe1EBER3XX48GFcPnAAz5z8331fq1MqoaishEqthq2tLazuXuxgKL/0eQKdnnsOTz/9tEGPS/QwuGJHRERGw83NDUk2NqiTSGBxd4u0TqnEnTt3UF39+/3maqqr4ermBonYMJ8oqpNIUGFjAzc3N4Mcj+hRMdgREZHRcHNzg0gqRZmtLRyKi1FRcQfV1dX39WmggUqlhERsmFW7MltbiKRSQYNd796977t45fDhw2jZ8v4reKn5YrAjIiKj4ezsDEgkuGpjgzZF+p+KYWFhCQuphcHmym/pDGtbW+0tTIRw6tQpwY5NpoNXxRIRkVFITEzE4MGDsffAAVz3bA1VPdusIpEY9nb2aNmyZb23DmkKKrEY2W3aIKh7d0gkEoMck+hRMdgREZGgjh07hoiICISFheHAgQNISUmBwsICxV5e2h6xSAx7ewe4ubnB3t4eYgOFOgC44eICpUyGoKDHu3cekSFwK5aIiAxOo9HgyJEjiImJwfHjx3W+VlZWhsysLLT084NrXh4cZLaQ2doaNMzdoxaJcNXHG+06doSTk5PBj0/0sLhiR0REBqPRaHDgwAH069cPERER94W6ey5cuoQqV1eUh4bCzs5OkFAHABmenqhwcUFYv36CHJ/oYTHYERFRk9NoNNi3bx+eeOIJDBw4EImJifX2tW7dGqtWrcKZM2fQPyICl/06olygx2qVyWS43NEPvfr1g4eHhyAzED0sBjsiImoyGo0Gu3fvRo8ePfDCCy/g9OnT9fZ5eXlh7dq1uHr1KqZPnw6ZTIawsDA4eXkiqUsXKA10v7p7lGIxkrp2gbOnJ/r27WvQYxM9DgY7IiJqdGq1Gjt37kRISAiGDRuG5OTkevt8fHywYcMGXLlyBVOmTIH1Hx7ZJZVKMXjoUChat8Yp/65QG2g7Vi0S4ZR/V1R5tMbzQ4dCKuXH0cl08JFiRETUaFQqFb777jvExcUhPT1db1/79u2xYMECvPbaa7CwePD96LKzs/H911/DOScHvdMvQKpWN/bYWkqxGKf8u0Lu7Y0Rr7wCHx+fJjsWUVNgsCMiosemVCrxzTffIC4uDpcvX9bb5+fnh8jISIwZM+ahVsKys7Pxw45vIbt5E90vXoSDQtEYY+sok8mQ1LULqjxaY/iolxnqyCQx2BER0SOrq6vDV199hUWLFuHKlSt6+7p06YLIyEiMGjXqkW/yW1BQgH179qAkNw+dMzPhl5cHcSOcwtQiETI8PXG5ox+cPT3x/NChcHd3f+z3JRICgx0RET202tpabN26FfHx8cjKytLbFxAQgKioKIwYMaJRntqgVCqRkJCAMwkJsCsqgm92DtoUFUHyCNuzKrEYN1xccNXHGxUuLujVrx/69u3Lz9SRSWOwIyKiBqupqcHmzZuxePFi5OTk6O0LDg5GdHQ0/v73v0PcBFe03rx5E4kJCcjKyIBUoYDPjRvwKJbDsbISFiqV3tfVSSQos7VFfktnZLdpA6VMhnYdOyKMtzQhM8FgR0REf6mqqgpffPEFli5diry8PL19PXr0QHR0NF544QWDPMu1pKQEqampSE1KQnVlJTRKJeyqquAgL4GlUgmxRg21SIxaqRTlzk6osLGBSCqFta0tgrp3R1BQEJ8oQWaFwY6IiPRSKBTYsGEDli1bhoKCAr19vXv3xsKFCzFw4ECDBLo/U6lUkMvlKCwsRGFhIW4XFKC2uhoqpRISqRSW1tZo5e4ONzc3uLm5wdnZuVG2homMDYMdERHdp6KiAp9++ilWrFiBW7du6e3r168foqOjERERIUigIyJd/IQoERFplZeXY+3atVi5ciWKi4v19oWHhyM6Ohrh4eEMdERGhMGOiIhQWlqKTz75BKtWrUJJSYnevmeeeQZRUVHo37+/AacjooZisCMiasbkcjlWrVqF1atXo7y8XG/foEGDEBUVhT59+hhwOiJ6WAx2RETNUFFRET766COsWbMGFRUVevuGDBmCqKgo9OzZ04DTEdGjYrAjImpGCgsLsXLlSqxbtw6VlZV6+4YPH46oqCiEhIQYcDoielwMdkREzUB+fj6WL1+O9evXo6qqqt4ekUiEkSNHIjIyEkFBQQaekIgaA4MdEZEZy83NxdKlS/H555+jpqam3h6xWIzRo0djwYIF6Nq1q4EnJKLGxGBHRGSGsrOzsWTJEmzatAm1tbX19kgkEowdOxbz589Hp06dDDwhETUFBjsiIjNy7do1LF68GFu2bIFSqay3RyqV4vXXX8cHH3yADh06GHhCImpKDHZERGYgMzMT8fHx2LZtG1QqVb09FhYWmDBhAubNm4d27doZeEIiMgQGOyIiE3bp0iUsWrQI27dvh1qtrrfH0tISb775JubOnQtvb28DT0hEhsRgR0RkgtLT0xEXF4cdO3ZA3yO/ra2t8dZbb2HOnDnw9PQ08IREJAQGOyIiE5KSkoLY2Fh8//33entsbGwwZcoUzJo1C+7u7gacjoiExmBHRGQCkpKSEBsbi927d+vtsbW1xXvvvYeZM2fC1dXVgNMRkbFgsCMiMmKnTp1CbGws9u3bp7fH3t4e06ZNw4wZM+Di4mLA6YjI2DDYEREZoYSEBMTGxuLAgQN6exwdHTFjxgxMnz4dTk5OBpyOiIwVgx0RkRE5duwYYmJicOTIEb09Tk5OmDlzJqZOnQpHR0cDTkdExo7BjohIYBqNBkeOHEFMTAyOHz+ut8/FxQWzZs3ClClTYG9vb8AJichUMNgREQlEo9Hg4MGDiImJQWJiot4+V1dXzJkzB5MnT4atra0BJyQiU8NgR0RkYBqNBvv370dMTAxOnz6tt8/DwwNz587FpEmTIJPJDDghEZkqBjsiIgPRaDTYs2cPYmJikJycrLfPy8sL8+bNw8SJE2FtbW3ACYnI1DHYERE1MbVajV27diEuLg4pKSl6+3x8fPDBBx9g/PjxsLKyMuCERGQuGOyIiJqISqXCd999h7i4OKSnp+vta9++PRYsWIDXXnsNFhYWBpyQiMwNgx0RUSNTKpX45ptvEBcXh8uXL+vt8/PzQ2RkJMaMGQOplD+Oiejx8ScJEVEjqaurw1dffYVFixbhypUrevu6dOmCyMhIjBo1ChKJxIATEpG5Y7AjInpMtbW12Lp1K+Lj45GVlaW3LyAgAFFRURgxYgQDHRE1CQY7IqJHVFNTg82bN2Px4sXIycnR2xccHIyoqCgMGzYMYrHYgBMSUXPDYEdE9JCqqqrwxRdfYOnSpcjLy9Pb16NHD0RHR+OFF16ASCQy4IRE1Fwx2BERNZBCocCGDRuwbNkyFBQU6O3r3bs3Fi5ciIEDBzLQEZFBMdgREf2FiooKfPrpp1ixYgVu3bqlty8sLAwLFy5EREQEAx0RCYLBjohIj/LycqxduxYrV65EcXGx3r7w8HBER0cjPDycgY6IBMVgR0T0J6Wlpfjkk0+watUqlJSU6O2LiIhAVFQUBgwYYMDpiIj0Y7AjIrpLLpdj1apVWL16NcrLy/X2DRo0CFFRUejTp48BpyMi+msMdkTU7BUVFeGjjz7CmjVrUFFRobdvyJAhiIqKQs+ePQ04HRFRwzHYEVGzVVhYiJUrV2LdunWorKzU2zd8+HBERkYiNDTUgNMRET08Bjsianby8/OxfPlyrF+/HlVVVfX2iEQijBw5EpGRkQgKCjLwhEREj4bBjoiajdzcXCxduhSff/45ampq6u0Ri8UYNWoUFixYAH9/fwNPSET0eBjsiMjsZWdnY8mSJdi0aRNqa2vr7RGLxXj11Vcxf/58dOrUycATEhE1DgY7IjJb165dw+LFi7FlyxYolcp6e6RSKV5//XV88MEH6NChg4EnJCJqXAx2RGR2MjMzER8fj23btkGlUtXbY2FhgQkTJmDevHlo166dgSckImoaDHZEZDYuXbqERYsWYfv27VCr1fX2WFpa4s0338TcuXPh7e1t4AmJiJoWgx0Rmby0tDTExcXh22+/hUajqbfH2toab731FubMmQNPT08DT0hEZBgMdkRkslJSUhAbG4vvv/9eb4+NjQ3eeecdzJo1Cx4eHgacjojI8BjsiMjkJCUlITY2Frt379bbY2tri/feew8zZ86Eq6urAacjIhIOgx0RmYxTp04hNjYW+/bt09tjb2+PadOmYcaMGXBxcTHgdEREwmOwIyKjl5CQgNjYWBw4cEBvj6OjI2bMmIHp06fDycnJgNMRERkPBjsiMlrHjh1DTEwMjhw5orfHyckJM2fOxNSpU+Ho6GjA6YiIjA+DHREZFY1GgyNHjiAmJgbHjx/X2+fi4oL3338f7777Luzt7Q04IRGR8WKwIyKjoNFocPDgQcTExCAxMVFvn6urK2bPno3JkyfDzs7OgBMSERk/BjsiEpRGo8H+/fsRExOD06dP6+3z8PDA3LlzMWnSJMhkMgNOSERkOhjsiEgQGo0Ge/bsQUxMDJKTk/X2eXl5Yd68eZg4cSKsra0NOCERkelhsCMig1Kr1di1axfi4uKQkpKit8/b2xvz58/H+PHjYWVlZcAJiYhMF4MdERmESqXCd999h7i4OKSnp+vta9++PebPn4/XXnsNlpaWBpyQiMj0MdgRUZNSKpX45ptvEBcXh8uXL+vt8/Pzw4IFCzBmzBhYWFgYcEIiIvPBYEdETaKurg5fffUVFi1ahCtXrujt69y5MyIjIzFq1ChIpfyRRET0OPhTlIgaVW1tLbZu3Yr4+HhkZWXp7QsICEBUVBRGjBgBiURiwAmJiMwXgx0RNYqamhps2rQJS5YsQU5Ojt6+bt26ITo6GsOGDYNYLDbghERE5o/BjogeS1VVFb744gssXboUeXl5evu6d++O6OhoDBkyBCKRyIATEhE1Hwx2RPRIFAoFNmzYgGXLlqGgoEBvX+/evREdHY1BgwYx0BERNTEGOyJ6KBUVFfj000+xYsUK3Lp1S29fWFgYFi5ciIiICAY6IiIDYbAjogYpLy/H2rVrsXLlShQXF+vtCw8PR3R0NMLDwxnoiIgMjMGOiB6otLQUn3zyCVatWoWSkhK9fREREYiKisKAAQMMOB0REf0Rgx0R1Usul2PVqlVYvXo1ysvL9fYNHDgQUVFR6Nu3rwGnIyKi+jDYEZGOoqIifPTRR1izZg0qKir09g0ZMgSRkZHo1auXAacjIqIHYbAjIgBAYWEhVq5ciXXr1qGyslJv3/DhwxEZGYnQ0FADTkdERA3BYEfUzOXn52P58uVYv349qqqq6u0RiUQYOXIkIiMjERQUZOAJiYiooRjsiJqp3NxcLF26FJ9//jlqamrq7RGJRBg9ejQWLFgAf39/A09IREQPi8GOqJnJzs7GkiVLsGnTJtTW1tbbIxaLMXbsWMyfPx+dO3c28IRERPSoGOyImolr165h8eLF2LJlC5RKZb09EokEr7/+OubPn48OHToYeEIiInpcDHZEZi4zMxPx8fHYtm0bVCpVvT0WFhaYMGEC5s2bh3bt2hl4QiIiaiwMdkRm6tKlS1i0aBG2b98OtVpdb4+lpSXefPNNzJ07F97e3gaekIiIGhuDHZGZSUtLQ1xcHL799ltoNJp6e6ytrfHWW29hzpw58PT0NPCERETUVBjsiMxESkoKYmNj8f333+vtsbGxweTJkzF79mx4eHgYcDoiIjIEBjsiE5eUlITY2Fjs3r1bb4+trS3effddvP/++3B1dTXgdEREZEgMdkQm6tSpU4iNjcW+ffv09tjb22PatGmYMWMGXFxcDDgdEREJgcGOyMQkJCQgNjYWBw4c0Nvj6OiIGTNmYNq0aXB2djbgdEREJCQGOyITcezYMcTExODIkSN6e5ycnDBz5kxMnToVjo6OBpyOiIiMAYMdkRHTaDQ4cuQIYmJicPz4cb19Li4ueP/99zFlyhQ4ODgYcEIiIjImDHZERkij0eDgwYOIiYlBYmKi3j5XV1fMnj0bkydPhp2dnQEnJCIiY8RgR2RENBoN9u/fj5iYGJw+fVpvn4eHB+bMmYO33noLMpnMgBMSEZExY7AjMgIajQZ79uxBTEwMkpOT9fZ5eXlh3rx5mDhxIqytrQ04IRERmQIGOyIBqdVq7Nq1C3FxcUhJSdHb5+3tjfnz52P8+PGwsrIy4IRERGRKGOyIBKBSqfDdd98hLi4O6enpevvat2+P+fPn47XXXoOlpaUBJyQiIlPEYEdkQEqlEt988w3i4uJw+fJlvX1+fn5YsGABxowZAwsLCwNOSEREpozBjsgA6urq8NVXX2HRokW4cuWK3r7OnTsjMjISo0aNglTK/z2JiOjh8MxB1IRqa2uxdetWxMfHIysrS29fQEAAoqKiMGLECEgkEgNOSERE5oTBjqgJ1NTUYPPmzVi8eDFycnL09nXr1g3R0dEYNmwYxGKxASckIiJzxGBH1IiqqqrwxRdfYOnSpcjLy9Pb1717d0RHR2PIkCEQiUQGnJCIiMwZgx1RI1AoFNiwYQOWLVuGgoICvX29e/dGdHQ0Bg0axEBHRESNjsGO6DFUVFTg008/xYoVK3Dr1i29fWFhYVi4cCEiIiIY6IiIqMkw2BE9gvLycqxduxYrV65EcXGx3r7w8HBER0cjPDycgY6IiJocgx3RQygtLcWaNWvw8ccfo6SkRG9fREQEoqKiMGDAAANOR0REzR2DHVEDyOVyrF69GqtXr0ZZWZnevoEDByIqKgp9+/Y14HRERES/YbAjeoCioiJ89NFH+Ne//oU7d+7o7RsyZAgiIyPRq1cvA05HRESki8GOqB6FhYVYuXIl1q1bh8rKSr19w4cPR2RkJEJDQw04HRERUf0Y7Ij+ID8/H8uXL8f69etRVVVVb49IJMLIkSMRGRmJoKAgA09IRESkH4MdEYDc3FwsW7YMn332GWpqaurtEYlEGD16NBYsWAB/f38DT0hERPTXGOyoWcvOzsbSpUuxceNG1NbW1tsjFosxduxYzJ8/H507dzbwhERERA3HYEfN0rVr17B48WJs2bIFSqWy3h6JRILXX38d8+fPR4cOHQw8IRER0cNjsKNmJTMzE/Hx8di2bRtUKlW9PRYWFpgwYQLmzZuHdu3aGXhCIiKiR8dgR83CpUuXsGjRImzfvh1qtbreHktLS7z55puYO3cuvL29DTwhERHR42OwI7OWnp6OuLg47NixAxqNpt4ea2trvPXWW5gzZw48PT0NPCEREVHjYbAjs5SSkoK4uDjs3LlTb4+NjQ3eeecdzJo1Cx4eHgacjoiIqGkw2JFZSUpKQmxsLHbv3q23x9bWFu+99x5mzpwJV1dXA05HRETUtBjsyCycPn0aMTEx2Ldvn94ee3t7TJs2DTNmzICLi4sBpyMiIjIMBjsyaYmJiYiJicGBAwf09jg6OmLGjBmYPn06nJycDDgdERGRYTHYkUk6fvw4YmJicPjwYb09Tk5OmDlzJqZOnQpHR0cDTkdERCQMBjsyGRqNBkePHkVMTAyOHTumt8/FxQXvv/8+3n33Xdjb2xtwQiIiImEx2JHR02g0+OWXXxATE4OEhAS9fa6urpg9ezYmT54MOzs7A05IRERkHBjsyGhpNBrs378fMTExOH36tN4+Dw8PzJ07F5MmTYJMJjPghERERMaFwY6MjkajwZ49exATE4Pk5GS9fV5eXpg3bx4mTpwIa2trA05IRERknBjsyGio1Wr88MMPiI2NRUpKit4+b29vzJ8/H+PHj4eVlZUBJyQiIjJuDHYkOJVKhZ07dyI2Nhbp6el6+9q3b4/58+fjtddeg6WlpQEnJCIiMg0MdiQYpVKJb775BosWLcKlS5f09vn5+SEyMhJjxoyBVMr/ZImIiPThWZIMrq6uDl999RUWLVqEK1eu6O3r3LkzoqKiMGrUKEgkEgNOSEREZJoY7MhgamtrsXXrVsTHxyMrK0tvX0BAAKKiojBixAgGOiIioofAYEdNrqamBps3b8bixYuRk5Ojt69bt26Ijo7GsGHDIBaLDTghERGReWCwoyZTXV2NL774AkuWLEFeXp7evu7duyM6OhpDhgyBSCQy4IRERETmhcGOGp1CocCGDRuwfPly5Ofn6+3r3bs3Fi5ciIEDBzLQERERNQIGO2o0FRUV+PTTT7FixQrcunVLb19YWBgWLlyIiIgIBjoiIqJGxGBHj628vBxr167FypUrUVxcrLcvPDwc0dHRCA8PZ6AjIiJqAgx29MhKS0uxZs0afPzxxygpKdHbFxERgaioKAwYMMCA0xERETU/DHb00ORyOVavXo3Vq1ejrKxMb9/AgQMRFRWFvn37GnA6IiKi5ovBjhqsqKgIH3/8MdasWYM7d+7o7RsyZAgiIyPRq1cvA05HREREDHb0lwoLC7Fy5UqsW7cOlZWVevuGDx+OyMhIhIaGGnA6IiIiuofBjvTKz8/H8uXLsX79elRVVdXbIxKJMHLkSERGRiIoKMjAExIREdEfMdjRfXJzc7Fs2TJ89tlnqKmpqbdHLBZj1KhRWLBgAfz9/Q08IREREdWHwY60cnJysGTJEmzcuBG1tbX19ojFYrz66quYP38+OnXqZOAJiYiI6EEY7AhZWVlYvHgxtmzZgrq6unp7pFIpXn/9dXzwwQfo0KGDgSckIiKihmCwa8YyMzMRHx+Pbdu2QaVS1dtjYWGBCRMmYN68eWjXrp2BJyQiIqKHwWDXDF26dAmLFi3C9u3boVar6+2xtLTEm2++iblz58Lb29vAExIREdGjYLBrRtLT0xEXF4cdO3ZAo9HU22NtbY233noLc+bMgaenp4EnJCIiosfBYNcMpKSkIC4uDjt37tTbY2Njg3feeQezZs2Ch4eHAacjIiKixsJgZ8aSk5MRGxuLH3/8UW+Pra0t3nvvPcycOROurq6GG46IiIgaHYOdGTp9+jRiY2Oxd+9evT329vaYNm0aZsyYARcXFwNOR0RERE2Fwc6MJCYmIiYmBgcOHNDb4+joiBkzZmD69OlwcnIy4HRERETU1BjszMDx48cRExODw4cP6+1xcnLCzJkzMXXqVDg6OhpwOiIiIjIUBjsTpdFocPToUcTExODYsWN6+1xcXDBr1ixMmTIF9vb2BpyQiIiIDI3BzsRoNBr88ssviImJQUJCgt4+V1dXzJkzB5MnT4atra0BJyQiIiKhMNiZCI1Gg59++gkxMTE4deqU3j4PDw/MnTsXkyZNgkwmM+CEREREJDQGOyOn0WiwZ88exMbGIikpSW+fl5cX5s2bh4kTJ8La2tqAExIREZGxYLAzUmq1Gj/88ANiY2ORkpKit8/HxwcffPABxo8fDysrKwNOSERERMaGwc7IqFQq7Ny5E7GxsUhPT9fb1759eyxYsACvvfYaLCwsDDghERERGSsGOyOhVCqxY8cOxMXF4dKlS3r7/Pz8EBkZiTFjxkAq5V8fERER/a5ZJAOVSgW5XI7CwkIUFhbidkEBaqqqoFapIJZIYGVjg1bu7nBzc4ObmxucnZ0hkUgMMltdXR22b9+ORYsWITMzU29f586dERUVhVGjRhlsNiIiImNmzOd3oYg0Go1G6CGaSklJCVJSUnA+ORnVlZXQKJWwq6qCo1wOC6USYo0GapEIdVIpypydUWFjA5FUCmtbWwSGhqJbt25N9nSG2tpabNu2DfHx8bh27ZrevoCAAERFRWHEiBFm/x8jERFRQxjz+V1oZhnsbt68icQTJ5CVmQkLhQLeOTfgIZfDsbISFiqV3tfVSSQos7VFvrMzcrzboE4mQzs/P4T17w8PD49Gma2mpgabN2/G4sWLkZOTo7cvODgYUVFRGDZsGMRicaMcm4iIyJQZ8/ndWJhVsFMqlUhISMCZhATYFRWhQ3YOvIqKIFGrH/q9VGIxcl1ccMXHGxUuLugZFoawsLBH/lxbdXU1vvjiCyxZsgR5eXl6+3r06IHo6Gi88MILEIlEj3QsIiIic2LM53djYzbBrqCgAPv27EFJbh46Z2bCLy8P4kb41tQiETI9PXHJzw/OXp54fuhQuLu7N/j1CoUCn332GZYtW4b8/Hy9fb1798bChQsxcOBABjoiIqK7jPX8bqzMIthlZ2fjhx07ILuZj+4XL8JBoWj0Y5TLZEjq0gWK1q0xfNTL8PHxAfDbf3AHDhxAYGAgQkNDtf0VFRVYv349li9fjlu3bul937CwMCxcuBAREREMdERERH8g5PndVJl8sMvOzsb3X3+Nltk56HXhAqSPsCzbUEqxGKf8u0Lu7Y0Rr7yC2tpa9O/fH4WFhQCAHTt2YNCgQVi7di1WrlyJoqIive8VHh6O6OhohIeHM9ARERH9iZDnd1MOdyYd7AoKCvDN1q1okXUdfdLTG2Vp9q+oRSKcDPBHiU9b7Nr7b5w4cUL7NQcHB4jFYpSWlup9fUREBKKiojBgwIAmn5WIiMgUCXl+L23bDqNff81kt2VN9nJLpVKJfXv2QHYzH70vXDDIXzoAiDUa9E6/AEn2dXTu0EHnFiTl5eV6Q92gQYOQmJiIX375haGOiIhID6HP7zb5N7F/zx4olUqDHLexmWywS0hIQEluHrpfvNiky7P1qa2sRIf//Q+ezs7o27fvA3uHDBmC06dPY//+/ejTp4+BJiQiIjJNQp7fpWo1ul+4CHleHhITEw167MZiksHu5s2bOJOQgM6ZmU3yQcoHqa2rQ2lpKWzLy+F36RLCevasd7l2+PDhSEpKwp49e9CzZ0+DzkhERGSKhDy/3+OoUKBTRiZOnzjxwLtZGCuTDHaJJ07ArqgIfg+4H1xT0AAoLi6++yugdUYGXCoqEPanVTtPT0/s3LlT5ypZIiIiejChzu9/1jEvD3ZFRUj4w+foTYXJBbuSkhJkZWaiQ3aOwfbd71HW1UGj+X1ZWKzRoM2VK+jYrh0cHR219by8PFy/ft2gsxEREZkyIc/vfybWaOCbnYOsjAyUlJQIOsvDMrlgl5KSAguFAl4PuJVIU5FaWADQvTWJy40bkCmV6Natm7bm6uoKLy8vA09HRERkuoQ8v9enTVERpAoFUlNThR7loZjU8zNUKhXOJyfDO+fGIz1G5HGJALi0bImy8nJoNBpIJGKIRCK0zc1Dv969YWdnBw8PD/zjH/+ApaWlwecjIiIyRUKf3+sjUavhc+MGUpOS0K9fP527YBizh1qx27p1K0JCQlBSUoLx48ejXbt22suB09LSEB4e/sDX79mzBx9//PEDez788EP861//uq/+n//8B8OGDUN1ZSU85PKHGfuB7iiVmJeRgb+dOYMXz53FxPQ0ZFUpcKq0FFMvXriv39LSEq1cXODaqhVaOreEs5MzfKuq0MLeHmPHjsW1a9fwyiuv4JtvvtG+5tdff8Xs2bMBALdv30bv3r0REhKCY8eOYezYsY/9PZw+fRo9evSAhYUF9u7d+9jvR0RE5k8qlSI4OBgBAQF46aWXoGiEixWuX7+OHj16aH8fGxuLZ555BjU1NQgPD0fnzp0RHBwMf39/bN++HcBvF0y8/PLLj3R+f+rMaQxJTsLg5CS8kJyEDTduQHV3G/dwcTE2P+Zn9TyK5aiurIT8T3OdO3cOBw8e1P5+/fr12LFjx2MdS5+4uDh4e3s3uL/BK3a7du3C0qVLcfToUTg5OQH47V4zX3/9NV577bUGvcfQoUMbPFh9ampqoFEq0aKi4qFep9ZoINbzdIe5GRnoZCvD4R49IBKJkFFZiaLauga/twaAVXExFHcqEB8fj/T0dADA2LFj8dRTT8HNzQ09evTQ/od++PBh9OzZUxten3zyyQYfS6VS1fsvhtatW2Pjxo1YuXJlg9+LiIiatxYtWuDcuXMAfjtnrV+/HjNnzmy091+9ejV+/vlnHDx4EFZWVgCAnTt3IiAgAAUFBQgODsaYMWPQunVr/POf/8T+77576PM7AHzTLRi2EglK6urw/uVLqFCp8H7btni6ZcvHml+t0cCxshIapRKFhYVo1aqV9mvnzp1DWloann32WQDA5MmTH+tYgP5z/HPPPYeJEyc2+H0aHOzmzZuHw4cPw9XVVVubMWMGli9fjldfffW+4ebMmYPjx4+jtrYWc+bMwdixY7FlyxakpaVhxYoVyMjIwJgxY1BXV4enn34ax48fx6+//grgtz+wAQMGIDc3F/Hx8Rg9ejQAoKioCFu2bsX6wkL8zdkZc9u1BwD8eKsQX+TmQgNguKsb3vTyQm51NSZfSEcHmQwXKyvxfbdgTL90CYW1NQCAue3ao421NS5VVuJfXbpoH+vV0dYWAHDqDzcaPldejvisa6hVq2ErkWBZx05obW2N47cKsfh6NkTQoPrqVXQKCtQGO7VajV69euG7777DlStXsG3bNkybNg0zZ85ETU0Njh07hrVr1+Ldd9/F7t27oVKpsHTpUpw+fRp1dXV466238Pe//x07d+7E4cOHUVZWBkdHR3z66af1/v3Y29ujsrISBQUFuHbtWkP/WomIqJlSq9Xa80WXLl2QnJyM77//HosWLUJtbS1sbW2xYsUKeHp64uTJk4iJiYFYLIZUKsXu3btx+fJlzJ49G+q7W6dbtmxBdXU1ampqsGzZMmzduhVfffWV9rGb1dXVuHHjBmQyGa5fvw5ra2tcu3YNubm5mDBhAmYOH44f8vJwvLQE5UolcqtrMNrdDRNae6Jao7nvHN7/7iLTPU4WFvhnBz8MP3sWM3188MOtW8hQVGJeu/bYe/sW/pWTAwuRGF7WVvi0qz8qlEp8ePUqLldWABAh2tcXHlZWOtnhh+AQHNi/Hxu/+QbW1taYM2cORo8ejejoaFRXV+PQoUOIj4/H6dOn4eLighdffBHPP/+89s/3/Pnz0Gg0uHr1KqZMmYLi4mI4ODhg06ZNaNu2LcLDwxEcHIwTJ07gvffew/jx4+/7e3rYW6Y1ONjt27cPbdq00al16tQJnTp1wu7du9GhQwdtfePGjfDw8MCZM2dQVVWFJ554AgMHDtR57YwZMxAZGYlhw4YhMjJS52tXr17F4cOHkZOTg+eee04b7C5cuIClw4bh2dw8vH4+FadKS+FjY4M1OTn4vlswbCQSjEo5hydaOKKF1AJXFQqs6NQZnW1tcaCoCC0spNgYEACNRoNKlQqnysrQ2dZW72rePR1kMnwd1A0SkQiHi4ux7kYOpjo4YlNeLqa0dEYPmQzngoNx6k9JOycnB71799b+/ueff9b+Wi6Xa1frfH197zvmzJkz6/2XU329f7Rr164Hfp2IiOieP59TvvrqK53f63tSUn3noj+e7+bOnQsACA4O1um5F3r+/D7OTk6wuHkT5eVluFBejg1eXlBpNHj9xg08K5HgpEIBO5EIG0O7a8/h9WljbQ0AKK7T3Xlbf+MG1nf1R1sbG9y5+xGydTduoLWVFVZ06gT13fcsUyp1ssM3BflwdXbGpOho/P3FF7V5JiYmRrtQBfz2kSjgtx20e6ugUVFRiIiIAABMmTIFGzZsQNu2bXHkyBHMnj0b3333HQDAwsJCu7DVGBr8Gbv/+7//q7f+wQcfYPHixTq1gwcP4osvvkBwcDD69OmDsrKy+1aRkpKS8Pe//x0AMGrUKJ2vvfDCC7CwsICvr6/OI7o6+PrCw9oaUpEIA11ckFRejvMVd9DHsQVaWFjASizGcy4uSCorBwC0tbFB57srcB1tZThTVoZlWVk4d+cO7KQNv26kTKnEexcvYHByElZcz8LlO3egVqsQYG2Nz4qL8X1pKVRVVbDmBRNEREQPTSQSQVpbCwDoYWMDmVgMe4kELSUSlKhUaGdhgV/Ly7G0AedwDe6/VUqogwOirmTi24IC7VcTS0swxsMDACAWiWB/9z3/mB0SSkpw/NIlzP3gA715pj4///wz/vOf/2DZsmWoqKjAf//7XwwbNgzBwcH4xz/+gbw/fPbvpZdeatCfUUM1ON3s3r0bbdq0wZtvvqlTDw0NhZOTEw4fPqytqdVqbNiw4b7Pj93bpvwr9/bi6/PHe9v8xUIbbP6wgtbORobdIaE4KpdjcdY1DGnliv5OTrisqHzgZ/AAYHVONp50dsZodw9kVFZi9qWLAICxTk7oLZPhpEKB+KNH8dzgwQ36/oiIiOh3IpEIortbuhZ/OB+LRSKoNBq0sbTERm9vXJBKtefw11q3vu99cqurIRaJ0NLCQqf+T98OOHfnDo7I5Xjx3FnsDdH/AIE/Zgc1gLeeeALeL76ICW+/ra0/KM/k5OTgH//4Bw4dOgSpVAq1Wg03NzftSt6fyWQyve/1KBq8Yrd//37Ex8dj3759931t/vz52uVIAHj22Wexbt06qO4ulaalpWl/fU9oaCj+/e9/A4B2OfKvXLl6FbcVCig1GhwsKkZ3BwcE2dnjZFkpypR1qFWr8UtxMXr84WbB9xTW1EAmkeBFNzeMa+2Ji5UVaGtjg44yW6y9kQPN3cCYWVmJX8vKdF5boVTBzfK3sLnrViHEEgmkUiny6urQwcoKrzk5wcPBAaXl5Q36PoiIiOh3Go0GGrH+SFKi1sDdyUnnHP5npXV1WHj1CsZ6eGg/N3/PjepqhDg4YKaPDyxEIpQqlejbwgnb7z4yTK3RaLdo/6hfCyccyMiA6O5s9/KMvb097ty5c19/bW0tRo8ejTVr1sDT0xMA4ODgADc3N23mUalUSEtLa+CfzMNr8Ipd69atsXfvXjz33HP3fY5rwIABOpfiTpo0CVlZWQgJCYFarYaHhwd++uknndd8/PHHePXVVxEVFYX+/fvDwcHhL2fo4OuLT0+exHK5HH9zdkYvxxYAgPfaeGNsaqr24gl/OzvkVlfrvDZDocDSrGsQi0SwFosR7+cHAFjS0Q+Lrl3D07/+CplEDHcrK0S290VhTc3v34+XF+ZmZGB19nX0d3KGCIBrK1f8KyMDpwoKIYIGHl5esP/T9/DEE0/g3//+N5KSkrB+/Xps374d27Ztw4ULF7B48WJkZ2djzJgxSEhIgEqlwsKFC3Hw4EGo1Wq4u7vjxx9/xNdff63t1+f8+fMYPnw4SktLYWNjA19fX/znP//5yz9PIiJqvtq0aYMbN27o1E6dOoVJkybB1tYWzzzzDA4fPoyEhAT84x//wPHjxyGRSBAaGoq1a9fio48+wjfffAOpVApvb29s2bIFRUVF2vMaABw/fhyTJk3CgQMH8Pbbb6OwsBBWVlaoq6vDe++9hzfeeAPZ2dl4ftAgWLZogRaOLWCnUMDD/bctUouCQri2csX16mrMvXTxvnM4AIxOOQc1frvX7FBXV0z0vP8BAUuyspBTXQUNgGdausDdygpT2rTBwqtX8EJyEsQiEaLb+8L9TzuGL7u743R5OaJjY7Fi9WptnnnqqaewZMkShISEYNGiRdr+kydP4ty5c5g1a5a2du7cOWzfvh2TJ09GZGQk6urq8PbbbyMgIKBBf09RUVHYvHkzcnNzG9Qv0miEeW6HQqGAjY0NRCIRli9fjsLCQp1Vv/ocPnwYlw8cwDMn/2egKRumtq4WP/fogf0XL+LIkSMAfttOzs/P194ahoiIiOpnrOd3APilzxPo9NxzePrpp4UepUEEe/LE6dOnMWPGDKhUKnh5eWHr1q1/+Ro3Nzck2digTiKBhZ4rYoQgsraBqmVLvPvuu2jdujVu376NWbNmMdQRERE1gLGe3+skElTY2MDNzU3oURpMsGAXHh6u94OE+ri5uUEklaLM1hYuRvR5tjJbW4ikUvTv3x8vvvhikx3nwIED2kvI7wkLC8PatWub7JhERERNzdjP74YIdu+++652C/uepUuX4rnnnnuo9zGpZ8U6OzvD2tYW+c7ORvUXn9/yt7mcnZ2b9DjPPffcQ/8FExERGbvmfn4H0GiLNA/1rFihSSQSBIaGIse7DVQPuHrGkFRiMbLbtEFQ9+4m84BgIiIiY8Lze+Mxjj+9h9CtWzfUyWTIdXERehQAwA0XFyhlMgQFBQk9ChERkcni+b1xmFywc3JyQjs/P1zx8Yb6r+5Q3MTUIhGu+nijXceOvFCCiIjoMfD83jhMLtgBQFj//qhwcUHm3Zv/CSXD0xMVLi4I69dP0DmIiIjMAc/vj88kg52Hhwd6hoXhkp8fyhv5URwNVSaT4XJHP/Tq1w8ed581R0RERI+O5/fHZ5LBDvjtNh9OXp5I6tIFSgN/0FIpFiOpaxc4e3qib9++Bj02ERGROeP5/fGYbLCTSqUYPHQoFK1b45R/V4Ptx6tFIpzy74oqj9Z4fuhQSKUmdccYIiIio8bz++Mx2WAHAO7u7hg+6mXIvb1xMsC/yZO9UizGyQB/yL29MXzUy3B3d2/S4xERETVHPL8/OsGeFduYsrOz8cOObyG7eRPdL16Eg0LR6Mcok8mQ1LULqjxaY/iol+Hj49PoxyAiIqLf8fz+8Mwi2AFAQUEB9u3Zg5LcPHTOzIRfXh7EjfCtqUUiZHh64nJHPzh7euL5oUNNOskTERGZEp7fH47ZBDsAUCqVSEhIwJmEBNgVFcE3OwdtioogUasf+r1UYjFuuLjgqo83Klxc0KtfP/Tt29dk99yJiIhMFc/vDWdWwe6emzdvIjEhAVkZGZAqFPC5cQMexXI4VlbCQqXS+7o6iQRltrbIb+mM7DZtoJTJ0K5jR4SZ6CXPRERE5oTn979mlsHunpKSEqSmpiI1KQnVlZXQKJWwq6qCg7wElkolxBo11CIxaqVSlDs7ocLGBiKpFNa2tgjq3h1BQUEmd8dpIiIic8fzu35mHezuUalUkMvlKCwsRGFhIW4XFKC2uhoqpRISqRSW1tZo5e4ONzc3uLm5wdnZ2aQe+EtERNQc8fx+v2YR7IiIiIiaA5O+jx0RERER/Y7BjoiIiMhMMNgRERERmQkGOyIiIiIzwWBHREREZCYY7IiIiIjMBIMdERERkZlgsCMiIiIyEwx2RERERGaCwY6IiIjITDDYEREREZkJBjsiIiIiM8FgR0RERGQmGOyIiIiIzASDHREREZGZYLAjIiIiMhMMdkRERERmgsGOiIiIyEww2BERERGZCQY7IiIiIjPBYEdERERkJhjsiIiIiMwEgx0RERGRmWCwIyIiIjITDHZEREREZoLBjoiIiMhMMNgRERERmYn/B9eWMQC9SuXtAAAAAElFTkSuQmCC", "text/plain": [ "
" ] diff --git a/Tutorial/6_GraphPipeline.ipynb b/Tutorial/6_GraphPipeline.ipynb index 320c68f1..810c6890 100644 --- a/Tutorial/6_GraphPipeline.ipynb +++ b/Tutorial/6_GraphPipeline.ipynb @@ -19,7 +19,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -107,7 +107,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/7_dask_parallelization.ipynb b/Tutorial/7_dask_parallelization.ipynb index 4769c491..0a68448f 100644 --- a/Tutorial/7_dask_parallelization.ipynb +++ b/Tutorial/7_dask_parallelization.ipynb @@ -33,14 +33,91 @@ "name": "stderr", "output_type": "stream", "text": [ - "Evaluations: : 242it [02:01, 1.99it/s]\n" + "Generation: 20%|██ | 1/5 [00:01<00:07, 1.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9995194086144522\n" + "Generation: 1\n", + "Best roc_auc_score score: 0.9976190476190476\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 40%|████ | 2/5 [00:04<00:06, 2.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 2\n", + "Best roc_auc_score score: 0.9976984126984128\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 60%|██████ | 3/5 [00:09<00:07, 3.80s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 3\n", + "Best roc_auc_score score: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 80%|████████ | 4/5 [00:15<00:04, 4.46s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 4\n", + "Best roc_auc_score score: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 100%|██████████| 5/5 [00:24<00:00, 4.99s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 5\n", + "Best roc_auc_score score: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n" ] } ], @@ -55,7 +132,7 @@ "\n", "if __name__==\"__main__\":\n", " scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n", - " X, y = sklearn.datasets.load_digits(return_X_y=True)\n", + " X, y = sklearn.datasets.load_iris(return_X_y=True)\n", " X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", " \n", " graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", @@ -66,7 +143,7 @@ " )\n", "\n", " est = tpot2.TPOTEstimator(\n", - " scorers = [\"roc_auc\"],\n", + " scorers = [\"roc_auc_ovr\"],\n", " scorers_weights = [1],\n", " classification = True,\n", " cv = 5,\n", @@ -74,7 +151,7 @@ " population_size= 10,\n", " generations = 5,\n", " max_eval_time_seconds = 60*5,\n", - " verbose = 2,\n", + " verbose = 3,\n", " )\n", " \n", " \n", @@ -105,14 +182,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "Evaluations: : 224it [02:00, 1.86it/s]\n" + "Generation: 100%|██████████| 5/5 [00:11<00:00, 2.24s/it]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/decomposition/_fastica.py:595: UserWarning: n_components is too large: it will be set to 8\n", + " warnings.warn(\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/decomposition/_fastica.py:128: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " warnings.warn(\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9996005895289903\n" + "1.0\n" ] } ], @@ -122,7 +205,7 @@ "import sklearn.datasets\n", "import numpy as np\n", "scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n", - "X, y = sklearn.datasets.load_digits(return_X_y=True)\n", + "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", "\n", "\n", @@ -134,7 +217,7 @@ " )\n", "\n", "est = tpot2.TPOTEstimator(\n", - " scorers = [\"roc_auc\"],\n", + " scorers = [\"roc_auc_ovr\"],\n", " scorers_weights = [1],\n", " classification = True,\n", " cv = 5,\n", @@ -233,23 +316,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "Evaluations: : 119it [02:01, 1.02s/it]\n" + "Generation: 100%|██████████| 5/5 [00:13<00:00, 2.62s/it]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.9988827327847432\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-23 13:49:06,747 - distributed.nanny - WARNING - Worker process still alive after 3.1999992370605472 seconds, killing\n", - "2023-08-23 13:49:06,748 - distributed.nanny - WARNING - Worker process still alive after 3.199999694824219 seconds, killing\n", - "2023-08-23 13:49:06,748 - distributed.nanny - WARNING - Worker process still alive after 3.199999694824219 seconds, killing\n" + "1.0\n" ] } ], @@ -263,7 +339,7 @@ "\n", "est = tpot2.TPOTEstimator(\n", " client = client,\n", - " scorers = [\"roc_auc\"],\n", + " scorers = [\"roc_auc_ovr\"],\n", " scorers_weights = [1],\n", " classification = True,\n", " cv = 5,\n", @@ -297,29 +373,23 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Evaluations: : 132it [02:00, 1.10it/s]\n" + "Generation: 100%|██████████| 5/5 [00:16<00:00, 3.33s/it]\n", + "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "0.999973663151898\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-23 13:51:14,527 - distributed.nanny - WARNING - Worker process still alive after 3.199999694824219 seconds, killing\n", - "2023-08-23 13:51:14,528 - distributed.nanny - WARNING - Worker process still alive after 3.19999984741211 seconds, killing\n" + "1.0\n" ] } ], @@ -331,7 +401,7 @@ "import numpy as np\n", "\n", "scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n", - "X, y = sklearn.datasets.load_digits(return_X_y=True)\n", + "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n", "\n", "\n", @@ -352,7 +422,7 @@ "\n", " est = tpot2.TPOTEstimator(\n", " client = client,\n", - " scorers = [\"roc_auc\"],\n", + " scorers = [\"roc_auc_ovr\"],\n", " scorers_weights = [1],\n", " classification = True,\n", " cv = 5,\n", @@ -385,17 +455,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sun Grid Engine is not installed. This example requires Sun Grid Engine to be installed.\n" - ] - } - ], + "outputs": [], "source": [ "from dask.distributed import Client, LocalCluster\n", "import sklearn\n", @@ -474,7 +536,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/8_SH_and_early_termination.ipynb b/Tutorial/8_SH_and_early_termination.ipynb index 8b6c2e49..26f08e49 100644 --- a/Tutorial/8_SH_and_early_termination.ipynb +++ b/Tutorial/8_SH_and_early_termination.ipynb @@ -41,20 +41,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import tpot2\n", @@ -88,87 +77,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-14 11:49:45,920 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-q6nay1zr', purging\n", - "2023-06-14 11:49:45,921 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-wni1q2fv', purging\n", - "2023-06-14 11:49:45,921 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-kunoeg91', purging\n", - "2023-06-14 11:49:45,921 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-40sr99dr', purging\n", - "2023-06-14 11:49:45,922 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_b9njy2q', purging\n", - "2023-06-14 11:49:45,922 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-qft6b6eq', purging\n", - "2023-06-14 11:49:45,922 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-cgnqe8s_', purging\n", - "2023-06-14 11:49:45,922 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-mcu4ugbz', purging\n", - "2023-06-14 11:49:45,923 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-za145tll', purging\n", - "2023-06-14 11:49:45,923 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-3qdbpmh_', purging\n", - "2023-06-14 11:49:45,923 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-54ch2nwd', purging\n", - "2023-06-14 11:49:45,923 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-0zc92jfw', purging\n", - "2023-06-14 11:49:45,923 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-ub9p6598', purging\n", - "2023-06-14 11:49:45,924 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-8peu6bbu', purging\n", - "2023-06-14 11:49:45,924 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-1qp5dr29', purging\n", - "2023-06-14 11:49:45,924 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-hfh3inka', purging\n", - "2023-06-14 11:49:45,924 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-b1yl5oa1', purging\n", - "2023-06-14 11:49:45,924 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-epp_nuw_', purging\n", - "2023-06-14 11:49:45,925 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-q1qdqc8g', purging\n", - "2023-06-14 11:49:45,925 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-ek1b28f4', purging\n", - "2023-06-14 11:49:45,925 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-1806jovl', purging\n", - "2023-06-14 11:49:45,925 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-p0cuouft', purging\n", - "2023-06-14 11:49:45,925 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-wh0g6edf', purging\n", - "2023-06-14 11:49:45,926 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-1o1ws1of', purging\n", - "2023-06-14 11:49:45,926 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_zh96wch', purging\n", - "2023-06-14 11:49:45,926 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-wd9vzw4h', purging\n", - "2023-06-14 11:49:45,926 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-jy7obwb9', purging\n", - "2023-06-14 11:49:45,926 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-f6ildiiw', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-4ddayasf', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-fn6vfz6t', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-eyc403bk', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-fr7a5y2z', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-9kejqh6s', purging\n", - "2023-06-14 11:49:45,927 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_xaoujzg', purging\n", - "2023-06-14 11:49:45,928 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-zimc_s51', purging\n", - "2023-06-14 11:49:45,928 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-vtsv2zit', purging\n", - "2023-06-14 11:49:45,928 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-hj0s47vd', purging\n", - "2023-06-14 11:49:45,928 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-dpti5p3r', purging\n", - "2023-06-14 11:49:45,928 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-4cplddft', purging\n", - "2023-06-14 11:49:45,929 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-poszaeet', purging\n", - "2023-06-14 11:49:45,929 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-cjx6kkgn', purging\n", - "2023-06-14 11:49:45,929 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-u096a9iq', purging\n", - "2023-06-14 11:49:45,929 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-0k3omqwi', purging\n", - "2023-06-14 11:49:45,929 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-zk0s6ywn', purging\n", - "2023-06-14 11:49:45,930 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-bwn757sx', purging\n", - "2023-06-14 11:49:45,930 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-2nu35fgy', purging\n", - "2023-06-14 11:49:45,930 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-w6b4di6m', purging\n", - "2023-06-14 11:49:45,930 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-asj0iobm', purging\n", - "2023-06-14 11:49:45,930 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-gxxzxsyi', purging\n", - "2023-06-14 11:49:45,931 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-qa8099ky', purging\n", - "2023-06-14 11:49:45,931 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_uypy41h', purging\n", - "2023-06-14 11:49:45,931 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-a4ujlka7', purging\n", - "2023-06-14 11:49:45,931 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-dwhz05x3', purging\n", - "2023-06-14 11:49:45,931 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-kgug_o6d', purging\n", - "2023-06-14 11:49:45,932 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-rnbpw5ka', purging\n", - "2023-06-14 11:49:45,932 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-i52qfiid', purging\n", - "2023-06-14 11:49:45,932 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_5el2wab', purging\n", - "2023-06-14 11:49:45,932 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-mqhhdxip', purging\n", - "2023-06-14 11:49:45,932 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-i6xplvqh', purging\n", - "2023-06-14 11:49:45,933 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_dmc4eb5', purging\n", - "2023-06-14 11:49:45,933 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-mok5p0dw', purging\n", - "2023-06-14 11:49:45,933 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-ugwiqoc3', purging\n", - "2023-06-14 11:49:45,933 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-e97he6cf', purging\n", - "2023-06-14 11:49:45,933 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-an5jredd', purging\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total time: 17.05474090576172\n" - ] - } - ], + "outputs": [], "source": [ "# A Graph pipeline starting with at least one selector as a leaf, potentially followed by a series\n", "# of stacking classifiers or transformers, and ending with a classifier. The graph will have at most 15 nodes and a max depth of 6.\n", @@ -194,14 +105,14 @@ " )\n", "\n", "est = tpot2.TPOTEstimator(\n", - " scorers = [\"roc_auc\"],\n", + " scorers = [\"roc_auc_ovr\"],\n", " scorers_weights = [1],\n", " classification = True,\n", " cv = 5,\n", " search_space = graph_search_space,\n", " generations = 50,\n", " max_eval_time_seconds = 60*5,\n", - " verbose = 2,\n", + " verbose = 3,\n", "\n", "\n", " population_size=population_size,\n", @@ -211,6 +122,7 @@ " \n", " budget_range = budget_range,\n", " generations_until_end_budget=generations_until_end_budget,\n", + " n_jobs=30,\n", " )\n", "\n", "\n", @@ -256,20 +168,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import tpot2\n", @@ -291,17 +192,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total time: 23.97980833053589\n" - ] - } - ], + "outputs": [], "source": [ "graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n", " root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n", @@ -335,20 +228,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import tpot2\n", @@ -370,17 +252,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total time: 23.03678798675537\n" - ] - } - ], + "outputs": [], "source": [ "\n", "\n", @@ -415,20 +289,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.2, 3.4, 1. ])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import math\n", "np.array([1.2,3.4,1])" @@ -436,25 +299,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ribeirop/miniconda3/envs/tpot2env/lib/python3.10/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total time: 36.7981653213501\n" - ] - } - ], + "outputs": [], "source": [ "est = tpot2.TPOTEstimator( \n", " generations=5,\n", @@ -504,7 +351,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/Tutorial/9_Genetic_Algorithm_Overview.ipynb b/Tutorial/9_Genetic_Algorithm_Overview.ipynb index 3abfd1e6..9c931057 100644 --- a/Tutorial/9_Genetic_Algorithm_Overview.ipynb +++ b/Tutorial/9_Genetic_Algorithm_Overview.ipynb @@ -16,14 +16,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 100/100 [04:05<00:00, 2.46s/it]\n" + "Generation: 100%|██████████| 100/100 [03:43<00:00, 2.23s/it]\n" ] } ], @@ -35,7 +35,7 @@ "import matplotlib.pyplot as plt\n", "from dask.distributed import Client, LocalCluster\n", "\n", - "class SubsetSelector(tpot2.individual_representations.BaseIndividual):\n", + "class SubsetSelector(tpot2.individual.BaseIndividual):\n", " def __init__( self,\n", " values,\n", " initial_set = None,\n", @@ -59,7 +59,7 @@ " self.crossover_list = [self._crossover_swap]\n", " \n", "\n", - " def mutate(self, rng_=None):\n", + " def mutate(self, rng=None):\n", " mutation_list_copy = self.mutation_list.copy()\n", " random.shuffle(mutation_list_copy)\n", " for func in mutation_list_copy:\n", @@ -67,7 +67,7 @@ " return True\n", " return False\n", "\n", - " def crossover(self, ind2, rng_=None):\n", + " def crossover(self, ind2, rng=None):\n", " crossover_list_copy = self.crossover_list.copy()\n", " random.shuffle(crossover_list_copy)\n", " for func in crossover_list_copy:\n", @@ -146,15 +146,15 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "best subset {0, 65, 2, 1, 38, 71, 40, 75, 44, 15, 48, 16, 85, 59, 60, 62}\n", - "Best value 2056.0, weight 48.6142482308331\n", + "best subset {6, 10, 11, 17, 24, 25, 29, 30, 32, 37, 43, 45, 49, 50, 53, 57, 59, 66, 67, 76, 82, 91, 99}\n", + "Best value 2891.0, weight 49.1782782587545\n", "\n", "All results\n" ] @@ -189,73 +189,79 @@ " Generation\n", " Submitted Timestamp\n", " Completed Timestamp\n", + " Eval Error\n", " Pareto_Front\n", " \n", " \n", " \n", " \n", " 0\n", - " (16,)\n", - " 75.0\n", - " 2.054788\n", + " (24,)\n", + " 145.0\n", + " 2.946778\n", " NaN\n", " NaN\n", - " <__main__.SubsetSelector object at 0x7faf86cfc...\n", + " <__main__.SubsetSelector object at 0x735f11b77...\n", " 0.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 1\n", - " (13,)\n", - " 11.0\n", - " 4.466691\n", + " (99,)\n", + " 4.0\n", + " 3.345873\n", " NaN\n", " NaN\n", - " <__main__.SubsetSelector object at 0x7faf86635...\n", + " <__main__.SubsetSelector object at 0x735f11b77...\n", " 0.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 2\n", - " (41,)\n", - " 50.0\n", - " 6.249590\n", + " (28,)\n", + " 62.0\n", + " 6.965614\n", " NaN\n", " NaN\n", - " <__main__.SubsetSelector object at 0x7faf84e87...\n", + " <__main__.SubsetSelector object at 0x735f11b77...\n", " 0.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 3\n", - " (40,)\n", - " 35.0\n", - " 0.992726\n", + " (86,)\n", + " 108.0\n", + " 4.322944\n", " NaN\n", " NaN\n", - " <__main__.SubsetSelector object at 0x7faf83fdf...\n", + " <__main__.SubsetSelector object at 0x735f12734...\n", " 0.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 4\n", - " (77,)\n", - " 0.0\n", - " 1.475988\n", + " (36,)\n", + " 148.0\n", + " 1.660910\n", " NaN\n", " NaN\n", - " <__main__.SubsetSelector object at 0x7faf83ff1...\n", + " <__main__.SubsetSelector object at 0x735f12734...\n", " 0.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", @@ -270,147 +276,140 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", " 9995\n", - " (0, 1, 5, 15, 60, 62, 65, 75, 83, 85)\n", - " 1323.0\n", - " 17.180098\n", - " ((0, 5, 15, 60, 62, 65, 75, 83, 85), (0, 5, 15...\n", + " (0, 10, 11, 17, 24, 30, 32, 37, 43, 45, 49, 53...\n", + " 0.0\n", + " 58.255546\n", + " ((0, 10, 11, 17, 24, 30, 32, 37, 43, 45, 49, 5...\n", " ind_mutate\n", - " <__main__.SubsetSelector object at 0x7faf695e5...\n", + " <__main__.SubsetSelector object at 0x735f04c5d...\n", " 99.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", - " 1.0\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", + " NaN\n", " \n", " \n", " 9996\n", - " (0, 8, 15, 60, 62, 65, 75, 96)\n", - " 916.0\n", - " 18.695221\n", - " ((0, 15, 39, 40, 60, 62, 65, 75, 85), (0, 15, ...\n", - " ind_mutate , ind_mutate , ind_crossover\n", - " <__main__.SubsetSelector object at 0x7faf69fbf...\n", + " (10, 29, 48, 76)\n", + " 516.0\n", + " 10.002974\n", + " ((10, 17, 29, 76), (10, 17, 29, 76))\n", + " ind_mutate\n", + " <__main__.SubsetSelector object at 0x735f04c5e...\n", " 99.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 9997\n", - " (0, 15, 57, 62, 65, 75, 85, 86, 92)\n", - " 967.0\n", - " 15.581100\n", - " ((0, 15, 60, 62, 65, 75, 85, 86), (0, 15, 60, ...\n", + " (2, 10, 17, 25, 29, 43, 50, 53, 68, 76)\n", + " 1101.0\n", + " 5.322915\n", + " ((2, 10, 17, 25, 29, 43, 50, 53, 76), (2, 10, ...\n", " ind_mutate\n", - " <__main__.SubsetSelector object at 0x7faf6b05a...\n", + " <__main__.SubsetSelector object at 0x735f04c5e...\n", " 99.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 9998\n", - " (0, 15, 21, 65, 75, 76)\n", - " 878.0\n", - " 18.495023\n", - " ((0, 15, 60, 65, 75), (0, 15, 60, 65, 75))\n", + " (2, 10, 17, 20, 25, 29, 43, 76)\n", + " 910.0\n", + " 11.552131\n", + " ((2, 10, 17, 25, 29, 43, 76), (2, 10, 17, 25, ...\n", " ind_mutate\n", - " <__main__.SubsetSelector object at 0x7faf5eec0...\n", + " <__main__.SubsetSelector object at 0x735f04c5e...\n", " 99.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", " 9999\n", - " (0, 15, 39, 65, 75, 83, 85, 92)\n", - " 1054.0\n", - " 14.423653\n", - " ((0, 2, 15, 39, 60, 65, 75, 83, 85), (0, 15, 3...\n", - " ind_mutate , ind_mutate , ind_crossover\n", - " <__main__.SubsetSelector object at 0x7faf6b36b...\n", + " (0, 10, 11, 17, 25, 29, 43, 49, 50, 53, 57, 59...\n", + " 1967.0\n", + " 19.677724\n", + " ((0, 10, 11, 17, 25, 29, 30, 43, 49, 50, 53, 5...\n", + " ind_mutate\n", + " <__main__.SubsetSelector object at 0x735f04c5e...\n", " 99.0\n", - " 1.708121e+09\n", - " 1.708121e+09\n", + " 1.719625e+09\n", + " 1.719625e+09\n", + " None\n", " NaN\n", " \n", " \n", "\n", - "

10000 rows × 10 columns

\n", + "

10000 rows × 11 columns

\n", "" ], "text/plain": [ - " Selected Index Value Weight \\\n", - "0 (16,) 75.0 2.054788 \n", - "1 (13,) 11.0 4.466691 \n", - "2 (41,) 50.0 6.249590 \n", - "3 (40,) 35.0 0.992726 \n", - "4 (77,) 0.0 1.475988 \n", - "... ... ... ... \n", - "9995 (0, 1, 5, 15, 60, 62, 65, 75, 83, 85) 1323.0 17.180098 \n", - "9996 (0, 8, 15, 60, 62, 65, 75, 96) 916.0 18.695221 \n", - "9997 (0, 15, 57, 62, 65, 75, 85, 86, 92) 967.0 15.581100 \n", - "9998 (0, 15, 21, 65, 75, 76) 878.0 18.495023 \n", - "9999 (0, 15, 39, 65, 75, 83, 85, 92) 1054.0 14.423653 \n", + " Selected Index Value Weight \\\n", + "0 (24,) 145.0 2.946778 \n", + "1 (99,) 4.0 3.345873 \n", + "2 (28,) 62.0 6.965614 \n", + "3 (86,) 108.0 4.322944 \n", + "4 (36,) 148.0 1.660910 \n", + "... ... ... ... \n", + "9995 (0, 10, 11, 17, 24, 30, 32, 37, 43, 45, 49, 53... 0.0 58.255546 \n", + "9996 (10, 29, 48, 76) 516.0 10.002974 \n", + "9997 (2, 10, 17, 25, 29, 43, 50, 53, 68, 76) 1101.0 5.322915 \n", + "9998 (2, 10, 17, 20, 25, 29, 43, 76) 910.0 11.552131 \n", + "9999 (0, 10, 11, 17, 25, 29, 43, 49, 50, 53, 57, 59... 1967.0 19.677724 \n", "\n", - " Parents \\\n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "... ... \n", - "9995 ((0, 5, 15, 60, 62, 65, 75, 83, 85), (0, 5, 15... \n", - "9996 ((0, 15, 39, 40, 60, 62, 65, 75, 85), (0, 15, ... \n", - "9997 ((0, 15, 60, 62, 65, 75, 85, 86), (0, 15, 60, ... \n", - "9998 ((0, 15, 60, 65, 75), (0, 15, 60, 65, 75)) \n", - "9999 ((0, 2, 15, 39, 60, 65, 75, 83, 85), (0, 15, 3... \n", - "\n", - " Variation_Function \\\n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "... ... \n", - "9995 ind_mutate \n", - "9996 ind_mutate , ind_mutate , ind_crossover \n", - "9997 ind_mutate \n", - "9998 ind_mutate \n", - "9999 ind_mutate , ind_mutate , ind_crossover \n", + " Parents Variation_Function \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9995 ((0, 10, 11, 17, 24, 30, 32, 37, 43, 45, 49, 5... ind_mutate \n", + "9996 ((10, 17, 29, 76), (10, 17, 29, 76)) ind_mutate \n", + "9997 ((2, 10, 17, 25, 29, 43, 50, 53, 76), (2, 10, ... ind_mutate \n", + "9998 ((2, 10, 17, 25, 29, 43, 76), (2, 10, 17, 25, ... ind_mutate \n", + "9999 ((0, 10, 11, 17, 25, 29, 30, 43, 49, 50, 53, 5... ind_mutate \n", "\n", " Individual Generation \\\n", - "0 <__main__.SubsetSelector object at 0x7faf86cfc... 0.0 \n", - "1 <__main__.SubsetSelector object at 0x7faf86635... 0.0 \n", - "2 <__main__.SubsetSelector object at 0x7faf84e87... 0.0 \n", - "3 <__main__.SubsetSelector object at 0x7faf83fdf... 0.0 \n", - "4 <__main__.SubsetSelector object at 0x7faf83ff1... 0.0 \n", + "0 <__main__.SubsetSelector object at 0x735f11b77... 0.0 \n", + "1 <__main__.SubsetSelector object at 0x735f11b77... 0.0 \n", + "2 <__main__.SubsetSelector object at 0x735f11b77... 0.0 \n", + "3 <__main__.SubsetSelector object at 0x735f12734... 0.0 \n", + "4 <__main__.SubsetSelector object at 0x735f12734... 0.0 \n", "... ... ... \n", - "9995 <__main__.SubsetSelector object at 0x7faf695e5... 99.0 \n", - "9996 <__main__.SubsetSelector object at 0x7faf69fbf... 99.0 \n", - "9997 <__main__.SubsetSelector object at 0x7faf6b05a... 99.0 \n", - "9998 <__main__.SubsetSelector object at 0x7faf5eec0... 99.0 \n", - "9999 <__main__.SubsetSelector object at 0x7faf6b36b... 99.0 \n", + "9995 <__main__.SubsetSelector object at 0x735f04c5d... 99.0 \n", + "9996 <__main__.SubsetSelector object at 0x735f04c5e... 99.0 \n", + "9997 <__main__.SubsetSelector object at 0x735f04c5e... 99.0 \n", + "9998 <__main__.SubsetSelector object at 0x735f04c5e... 99.0 \n", + "9999 <__main__.SubsetSelector object at 0x735f04c5e... 99.0 \n", "\n", - " Submitted Timestamp Completed Timestamp Pareto_Front \n", - "0 1.708121e+09 1.708121e+09 NaN \n", - "1 1.708121e+09 1.708121e+09 NaN \n", - "2 1.708121e+09 1.708121e+09 NaN \n", - "3 1.708121e+09 1.708121e+09 NaN \n", - "4 1.708121e+09 1.708121e+09 NaN \n", - "... ... ... ... \n", - "9995 1.708121e+09 1.708121e+09 1.0 \n", - "9996 1.708121e+09 1.708121e+09 NaN \n", - "9997 1.708121e+09 1.708121e+09 NaN \n", - "9998 1.708121e+09 1.708121e+09 NaN \n", - "9999 1.708121e+09 1.708121e+09 NaN \n", + " Submitted Timestamp Completed Timestamp Eval Error Pareto_Front \n", + "0 1.719625e+09 1.719625e+09 None NaN \n", + "1 1.719625e+09 1.719625e+09 None NaN \n", + "2 1.719625e+09 1.719625e+09 None NaN \n", + "3 1.719625e+09 1.719625e+09 None NaN \n", + "4 1.719625e+09 1.719625e+09 None NaN \n", + "... ... ... ... ... \n", + "9995 1.719625e+09 1.719625e+09 None NaN \n", + "9996 1.719625e+09 1.719625e+09 None NaN \n", + "9997 1.719625e+09 1.719625e+09 None NaN \n", + "9998 1.719625e+09 1.719625e+09 None NaN \n", + "9999 1.719625e+09 1.719625e+09 None NaN \n", "\n", - "[10000 rows x 10 columns]" + "[10000 rows x 11 columns]" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -432,12 +431,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -487,7 +486,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index 5186d135..d1b9343d 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -47,7 +47,7 @@ def get_SGDRegressor_ConfigurationSpace(random_state): l1_ratio = Float("l1_ratio", bounds=(1e-7, 1.0), log=True) penalty = Categorical("penalty", ["l1", "l2", "elasticnet"]) epsilon = Float("epsilon", bounds=(1e-5, 1e-1), log=True) - loss = Categorical("loss", ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive",]) + loss = Categorical("loss", ['epsilon_insensitive', 'squared_epsilon_insensitive', 'huber', 'squared_error']) eta0 = Float("eta0", bounds=(1e-7, 1e-1), log=True) learning_rate = Categorical("learning_rate", ['optimal', 'invscaling', 'constant']) power_t = Float("power_t", bounds=(1e-5, 1.0), log=True) From 102cdd537cbf8d57d25ee65cf65cd6c832ac7fce Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 17:07:10 -0700 Subject: [PATCH 64/75] removed unused files, passed rng into default_rng --- .../nodes/estimator_node_custom_sampler.py | 59 ------- .../nodes/estimator_node_simple.py | 70 -------- .../nodes/genetic_feature_selection.py | 8 +- .../search_spaces/pipelines/dynamic_linear.py | 18 +- .../pipelines/dynamicunion copy.py | 165 ++++++++++++++++++ tpot2/search_spaces/pipelines/dynamicunion.py | 18 +- .../pipelines/genetic_sample_weight.py | 1 - .../pipelines/hierarchical_individual.py | 1 - tpot2/search_spaces/pipelines/sequential.py | 12 +- tpot2/search_spaces/pipelines/union.py | 10 +- 10 files changed, 198 insertions(+), 164 deletions(-) delete mode 100644 tpot2/search_spaces/nodes/estimator_node_custom_sampler.py delete mode 100644 tpot2/search_spaces/nodes/estimator_node_simple.py create mode 100644 tpot2/search_spaces/pipelines/dynamicunion copy.py delete mode 100644 tpot2/search_spaces/pipelines/genetic_sample_weight.py delete mode 100644 tpot2/search_spaces/pipelines/hierarchical_individual.py diff --git a/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py b/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py deleted file mode 100644 index 93a55a4e..00000000 --- a/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py +++ /dev/null @@ -1,59 +0,0 @@ -# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html -import tpot2 -import numpy as np -import pandas as pd -import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random -from ..base import SklearnIndividual, SklearnIndividualGenerator, check_same_subclass -from ConfigSpace import ConfigurationSpace - - -class EstimatorNodeCustomIndividual(SklearnIndividual): - def __init__(self, method: type, - sample_func : callable, - rng=None) -> None: - super().__init__() - self.method = method - self.sample_func = sample_func - - self.hyperparameters = self.sample_func(rng) - - def mutate(self, rng=None): - rng = np.random.default_rng(rng) - self.hyperparameters = self.sample_func(rng) - return True - - def _crossover(self, other, rng=None): - rng = np.random.default_rng(rng) - if self.method != other.method: - return False - - #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters - for hyperparameter in self.space: - if rng.choice([True, False]): - if hyperparameter in other.hyperparameters: - self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] - - def export_pipeline(self, **kwargs): - return self.method(**self.hyperparameters) - - def unique_id(self): - #return a dictionary of the method and the hyperparameters - method_str = self.method.__name__ - params = list(self.hyperparameters.keys()) - params = sorted(params) - - id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})" - - return id_str - -class EstimatorNodeCustom(SklearnIndividualGenerator): - def __init__(self, method : type, - sample_func: callable): - self.method = method - self.sample_func = sample_func - - def generate(self, rng=None): - return EstimatorNodeCustomIndividual(self.method, self.sample_func) \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node_simple.py b/tpot2/search_spaces/nodes/estimator_node_simple.py deleted file mode 100644 index 8063526a..00000000 --- a/tpot2/search_spaces/nodes/estimator_node_simple.py +++ /dev/null @@ -1,70 +0,0 @@ -# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html -import tpot2 -import numpy as np -import pandas as pd -import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random -from ..base import SklearnIndividual, SklearnIndividualGenerator - -class EstimatorNodeIndividual(SklearnIndividual): - def __init__(self, method, space ) -> None: - super().__init__() - self.method = method - self.space = space #a dictionary. keys are hyperparameters, values are the space of the hyperparameter. If list, then hyperparameter is categorical. If tuple, then hyperparameter is continuous. If single value, then hyperparameter is fixed. - - self._mutate_hyperparameters() - - def mutate(self, rng=None): - rng = np.random.default_rng(rng) - return self._mutate_hyperparameters(rng) - - def _mutate_hyperparameters(self, rng=None): - rng = np.random.default_rng(rng) - self.hyperparameters = {} - #sample new hyperparameters from the space - for hyperparameter in self.space: - hyperparameter_space = self.space[hyperparameter] - if isinstance(hyperparameter_space, list): - hp = rng.choice(hyperparameter_space) - elif isinstance(hyperparameter_space, tuple): - hp = rng.uniform(hyperparameter_space[0], hyperparameter_space[1]) - else: - hp = hyperparameter_space - - self.hyperparameters[hyperparameter] = hp - - return True - - def _crossover(self, other, rng=None): - rng = np.random.default_rng(rng) - if self.method != other.method: - return False - - #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters - for hyperparameter in self.space: - if rng.choice([True, False]): - if hyperparameter in other.hyperparameters: - self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] - - def export_pipeline(self, **kwargs): - return self.method(**self.hyperparameters) - - def unique_id(self): - #return a dictionary of the method and the hyperparameters - method_str = self.method.__name__ - params = list(self.hyperparameters.keys()) - params = sorted(params) - - id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})" - - return id_str - -class EstimatorNode(SklearnIndividualGenerator): - def __init__(self, method, space): - self.method = method - self.space = space - - def generate(self, rng=None): - return EstimatorNodeIndividual(self.method, self.space) \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index 0fe16586..9e36e666 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -31,16 +31,16 @@ def __init__( self, start_p=0.2, mutation_rate = 0.5, crossover_rate = 0.5, - mutation_rate_rate = 0, - crossover_rate_rate = 0, rng=None, ): self.start_p = start_p self.mutation_rate = mutation_rate self.crossover_rate = crossover_rate - self.mutation_rate_rate = mutation_rate_rate - self.crossover_rate_rate = crossover_rate_rate + self.mutation_rate_rate = 0 + self.crossover_rate_rate = 0 + + rng = np.random.default_rng(rng) diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py index 6da90d81..79ccedef 100644 --- a/tpot2/search_spaces/pipelines/dynamic_linear.py +++ b/tpot2/search_spaces/pipelines/dynamic_linear.py @@ -26,7 +26,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_length: int , self.pipeline = self._generate_pipeline(rng) def _generate_pipeline(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) pipeline = [] length = rng.integers(self.min_length, self.max_length) length = min(length, 3) @@ -37,7 +37,7 @@ def _generate_pipeline(self, rng=None): def mutate(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) options = [] if len(self.pipeline) > self.min_length: options.append(self._mutate_remove_node) @@ -48,19 +48,19 @@ def mutate(self, rng=None): return rng.choice(options)(rng) def _mutate_add_node(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) new_node = self.search_space.generate(rng) idx = rng.integers(len(self.pipeline)) self.pipeline.insert(idx, new_node) def _mutate_remove_node(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) idx = rng.integers(len(self.pipeline)) self.pipeline.pop(idx) def _mutate_step(self, rng=None): #choose a random step in the pipeline and mutate it - rng = np.random.default_rng() + rng = np.random.default_rng(rng) step = rng.choice(self.pipeline) return step.mutate(rng) @@ -68,7 +68,7 @@ def _mutate_step(self, rng=None): def _crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline - rng = np.random.default_rng() + rng = np.random.default_rng(rng) cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] rng.shuffle(cx_funcs) @@ -79,7 +79,7 @@ def _crossover(self, other, rng=None): return False def _crossover_swap_random_steps(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) max_steps = max(max_steps, 1) @@ -106,14 +106,14 @@ def _crossover_swap_step(self, other, rng): if len(self.pipeline) < 2: return False - rng = np.random.default_rng() + rng = np.random.default_rng(rng) idx = rng.integers(1,len(self.pipeline)) self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True def _crossover_inner_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) pipeline1_indexes= list(range(len(self.pipeline))) pipeline2_indexes= list(range(len(other.pipeline))) diff --git a/tpot2/search_spaces/pipelines/dynamicunion copy.py b/tpot2/search_spaces/pipelines/dynamicunion copy.py new file mode 100644 index 00000000..7951c25c --- /dev/null +++ b/tpot2/search_spaces/pipelines/dynamicunion copy.py @@ -0,0 +1,165 @@ +import tpot2 +import numpy as np +import pandas as pd +import sklearn +from tpot2 import config +from typing import Generator, List, Tuple, Union +import random +from ..base import SklearnIndividual, SklearnIndividualGenerator +from ..tuple_index import TupleIndex + +class DynamicUnionPipelineIndividual(SklearnIndividual): + """ + Takes in one search space. + Will produce a FeatureUnion of up to max_estimators number of steps. + The output of the FeatureUnion will the all of the steps concatenated together. + + """ + + def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=None, rng=None) -> None: + super().__init__() + self.search_space = search_space + + if max_estimators is None: + self.max_estimators = np.inf + else: + self.max_estimators = max_estimators + + self.pipeline = [] + + if self.max_estimators == np.inf: + init_max = 3 + else: + init_max = self.max_estimators + + rng = np.random.default_rng(rng) + + for _ in range(rng.integers(1, init_max)): + self.pipeline.append(self.search_space.generate(rng)) + + def mutate(self, rng=None): + rng = np.random.default_rng(rng) + mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step] + rng.shuffle(mutation_funcs) + for mutation_func in mutation_funcs: + if mutation_func(rng): + return True + + def _mutate_add_step(self, rng): + rng = np.random.default_rng(rng) + if len(self.pipeline) < self.max_estimators: + self.pipeline.append(self.search_space.generate(rng)) + return True + return False + + def _mutate_remove_step(self, rng): + rng = np.random.default_rng(rng) + if len(self.pipeline) > 1: + self.pipeline.pop(rng.integers(0, len(self.pipeline))) + return True + return False + + def _mutate_replace_step(self, rng): + rng = np.random.default_rng(rng) + idx = rng.integers(0, len(self.pipeline)) + self.pipeline[idx] = self.search_space.generate(rng) + return True + + #TODO mutate one step or multiple? + def _mutate_inner_step(self, rng): + rng = np.random.default_rng(rng) + indexes = rng.random(len(self.pipeline)) < 0.5 + indexes = np.where(indexes)[0] + mutated = False + if len(indexes) > 0: + for idx in indexes: + if self.pipeline[idx].mutate(rng): + mutated = True + else: + mutated = self.pipeline[rng.integers(0, len(self.pipeline))].mutate(rng) + + return mutated + + + def _crossover(self, other, rng=None): + rng = np.random.default_rng(rng) + + cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] + rng.shuffle(cx_funcs) + for cx_func in cx_funcs: + if cx_func(other, rng): + return True + + return False + + def _crossover_swap_step(self, other, rng): + rng = np.random.default_rng(rng) + idx = rng.integers(1,len(self.pipeline)) + idx2 = rng.integers(1,len(other.pipeline)) + + self.pipeline[idx], other.pipeline[idx2] = other.pipeline[idx2], self.pipeline[idx] + # self.pipeline[idx] = other.pipeline[idx2] + return True + + def _crossover_swap_random_steps(self, other, rng): + rng = np.random.default_rng(rng) + + max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) + max_steps = max(max_steps, 1) + + if max_steps == 1: + n_steps_to_swap = 1 + else: + n_steps_to_swap = rng.integers(1, max_steps) + + other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) + self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) + + # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] + + for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): + self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] + + return True + + + + def _crossover_inner_step(self, other, rng): + rng = np.random.default_rng(rng) + + #randomly select pairs of steps to crossover + indexes = list(range(1, len(self.pipeline))) + other_indexes = list(range(1, len(other.pipeline))) + #shuffle + rng.shuffle(indexes) + rng.shuffle(other_indexes) + + crossover_success = False + for idx, other_idx in zip(indexes, other_indexes): + if self.pipeline[idx].crossover(other.pipeline[other_idx], rng): + crossover_success = True + + return crossover_success + + def export_pipeline(self): + return sklearn.pipeline.make_union(*[step.export_pipeline() for step in self.pipeline]) + + def unique_id(self): + l = [step.unique_id() for step in self.pipeline] + # if all items are strings, then sort them + if all([isinstance(x, str) for x in l]): + l.sort() + l = ["FeatureUnion"] + l + return TupleIndex(tuple(l)) + + +class DynamicUnionPipeline(SklearnIndividualGenerator): + def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None: + """ + Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index. + """ + + self.search_spaces = search_spaces + + def generate(self, rng=None): + return DynamicUnionPipelineIndividual(self.search_spaces) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py index 48fa9669..401c16ef 100644 --- a/tpot2/search_spaces/pipelines/dynamicunion.py +++ b/tpot2/search_spaces/pipelines/dynamicunion.py @@ -41,7 +41,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=Non def mutate(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step] rng.shuffle(mutation_funcs) for mutation_func in mutation_funcs: @@ -49,7 +49,7 @@ def mutate(self, rng=None): return True def _mutate_add_step(self, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) max_attempts = 10 if len(self.union_dict) < self.max_estimators: for _ in range(max_attempts): @@ -60,20 +60,20 @@ def _mutate_add_step(self, rng): return False def _mutate_remove_step(self, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) if len(self.union_dict) > 1: self.union_dict.pop( rng.choice(list(self.union_dict.keys()))) return True return False def _mutate_replace_step(self, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) changed = self._mutate_remove_step(rng) or self._mutate_add_step(rng) return changed #TODO mutate one step or multiple? def _mutate_inner_step(self, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) changed = False values = list(self.union_dict.values()) for step in values: @@ -86,7 +86,7 @@ def _mutate_inner_step(self, rng): def _crossover(self, other, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] rng.shuffle(cx_funcs) @@ -97,7 +97,7 @@ def _crossover(self, other, rng=None): return False def _crossover_swap_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) changed = False self_step = rng.choice(list(self.union_dict.values())) @@ -118,7 +118,7 @@ def _crossover_swap_step(self, other, rng): def _crossover_swap_random_steps(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) self_values = list(self.union_dict.values()) other_values = list(other.union_dict.values()) @@ -137,7 +137,7 @@ def _crossover_swap_random_steps(self, other, rng): def _crossover_inner_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) changed = False self_values = list(self.union_dict.values()) diff --git a/tpot2/search_spaces/pipelines/genetic_sample_weight.py b/tpot2/search_spaces/pipelines/genetic_sample_weight.py deleted file mode 100644 index db731a85..00000000 --- a/tpot2/search_spaces/pipelines/genetic_sample_weight.py +++ /dev/null @@ -1 +0,0 @@ -from ..base import SklearnIndividual, SklearnIndividualGenerator \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/hierarchical_individual.py b/tpot2/search_spaces/pipelines/hierarchical_individual.py deleted file mode 100644 index db731a85..00000000 --- a/tpot2/search_spaces/pipelines/hierarchical_individual.py +++ /dev/null @@ -1 +0,0 @@ -from ..base import SklearnIndividual, SklearnIndividualGenerator \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py index da52222a..7a7e6a99 100644 --- a/tpot2/search_spaces/pipelines/sequential.py +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -25,7 +25,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None #TODO, mutate all steps or just one? def mutate(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) # mutated = False # for step in self.pipeline: @@ -43,7 +43,7 @@ def _crossover(self, other, rng=None): if len(self.pipeline) != len(other.pipeline): return False - rng = np.random.default_rng() + rng = np.random.default_rng(rng) cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step] rng.shuffle(cx_funcs) @@ -58,7 +58,7 @@ def _crossover_swap_step(self, other, rng): return False - rng = np.random.default_rng() + rng = np.random.default_rng(rng) idx = rng.integers(1,len(self.pipeline)) self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] @@ -72,7 +72,7 @@ def _crossover_swap_random_steps(self, other, rng): if len(self.pipeline) < 2: return False - rng = np.random.default_rng() + rng = np.random.default_rng(rng) max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) max_steps = max(max_steps, 1) @@ -97,7 +97,7 @@ def _crossover_swap_segment(self, other, rng): if len(self.pipeline) < 2: return False - rng = np.random.default_rng() + rng = np.random.default_rng(rng) idx = rng.integers(1,len(self.pipeline)) left = rng.choice([True, False]) @@ -109,7 +109,7 @@ def _crossover_swap_segment(self, other, rng): return True def _crossover_inner_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) # crossover_success = False # for idx in range(len(self.pipeline)): diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py index 1e1a58c8..32f988e6 100644 --- a/tpot2/search_spaces/pipelines/union.py +++ b/tpot2/search_spaces/pipelines/union.py @@ -25,14 +25,14 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) - self.pipeline.append(space.generate(rng)) def mutate(self, rng=None): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) step = rng.choice(self.pipeline) return step.mutate(rng) def _crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline - rng = np.random.default_rng() + rng = np.random.default_rng(rng) cx_funcs = [self._crossover_inner_step] rng.shuffle(cx_funcs) @@ -43,14 +43,14 @@ def _crossover(self, other, rng=None): return False def _crossover_swap_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) idx = rng.integers(1,len(self.pipeline)) self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True def _crossover_swap_random_steps(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) max_steps = max(max_steps, 1) @@ -71,7 +71,7 @@ def _crossover_swap_random_steps(self, other, rng): return True def _crossover_inner_step(self, other, rng): - rng = np.random.default_rng() + rng = np.random.default_rng(rng) crossover_success = False for idx in range(len(self.pipeline)): From 2b59ec8c783cb87457cbad467bb32f6a98e5fcae Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 17:36:36 -0700 Subject: [PATCH 65/75] wrapper now crossover over hyperparametesr --- tpot2/search_spaces/pipelines/wrapper.py | 53 +++++++++++++++++++----- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index 1cd33bf3..7b49e182 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -9,6 +9,11 @@ from ConfigSpace import ConfigurationSpace from ..tuple_index import TupleIndex +NONE_SPECIAL_STRING = "" +TRUE_SPECIAL_STRING = "" +FALSE_SPECIAL_STRING = "" + + class WrapperPipelineIndividual(SklearnIndividual): def __init__( self, @@ -18,14 +23,7 @@ def __init__( hyperparameter_parser: callable = None, wrapped_param_name: str = None, rng=None) -> None: - - - super().__init__() - - - - self.method = method self.space = space @@ -33,7 +31,6 @@ def __init__( self.hyperparameters_parser = hyperparameter_parser self.wrapped_param_name = wrapped_param_name - rng = np.random.default_rng(rng) self.node = self.estimator_search_space.generate(rng) @@ -44,8 +41,7 @@ def __init__( self.space.seed(rng.integers(0, 2**32)) self.hyperparameters = dict(self.space.sample_configuration()) - - + self.check_hyperparameters_for_None() def mutate(self, rng=None): rng = np.random.default_rng(rng) @@ -60,14 +56,49 @@ def _mutate_hyperparameters(self, rng=None): rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) self.hyperparameters = dict(self.space.sample_configuration()) + self.check_hyperparameters_for_None() return True def _mutate_node(self, rng=None): return self.node.mutate(rng) def _crossover(self, other, rng=None): - return self.node.crossover(other.node, rng) + if rng.choice([True, False]): + return self._crossover_hyperparameters(other, rng) + else: + self.estimator_search_space.crossover(other.estimator_search_space, rng) + + def _crossover_hyperparameters(self, other, rng=None): + if isinstance(self.space, dict): + return False + + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + self.check_hyperparameters_for_None() + + return True + + def check_hyperparameters_for_None(self): + for key, value in self.hyperparameters.items(): + #if string + if isinstance(value, str): + if value == NONE_SPECIAL_STRING: + self.hyperparameters[key] = None + elif value == TRUE_SPECIAL_STRING: + self.hyperparameters[key] = True + elif value == FALSE_SPECIAL_STRING: + self.hyperparameters[key] = False + + def export_pipeline(self): if self.hyperparameters_parser is not None: From 1b63414fdae8898d837aa582806219e61ea2edaa Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 17:36:49 -0700 Subject: [PATCH 66/75] made naming clearer --- .../search_spaces/pipelines/dynamic_linear.py | 8 +- .../pipelines/dynamicunion copy.py | 165 ------------------ tpot2/search_spaces/pipelines/dynamicunion.py | 12 +- tpot2/search_spaces/pipelines/sequential.py | 8 +- tpot2/search_spaces/pipelines/union.py | 27 +-- 5 files changed, 17 insertions(+), 203 deletions(-) delete mode 100644 tpot2/search_spaces/pipelines/dynamicunion copy.py diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py index 79ccedef..2ff2bf0b 100644 --- a/tpot2/search_spaces/pipelines/dynamic_linear.py +++ b/tpot2/search_spaces/pipelines/dynamic_linear.py @@ -69,7 +69,7 @@ def _crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline rng = np.random.default_rng(rng) - cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] + cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node] rng.shuffle(cx_funcs) for cx_func in cx_funcs: @@ -78,7 +78,7 @@ def _crossover(self, other, rng=None): return False - def _crossover_swap_random_steps(self, other, rng): + def _crossover_swap_multiple_nodes(self, other, rng): rng = np.random.default_rng(rng) max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) @@ -99,7 +99,7 @@ def _crossover_swap_random_steps(self, other, rng): return True - def _crossover_swap_step(self, other, rng): + def _crossover_swap_node(self, other, rng): if len(self.pipeline) != len(other.pipeline): return False @@ -112,7 +112,7 @@ def _crossover_swap_step(self, other, rng): self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True - def _crossover_inner_step(self, other, rng): + def _crossover_node(self, other, rng): rng = np.random.default_rng(rng) pipeline1_indexes= list(range(len(self.pipeline))) diff --git a/tpot2/search_spaces/pipelines/dynamicunion copy.py b/tpot2/search_spaces/pipelines/dynamicunion copy.py deleted file mode 100644 index 7951c25c..00000000 --- a/tpot2/search_spaces/pipelines/dynamicunion copy.py +++ /dev/null @@ -1,165 +0,0 @@ -import tpot2 -import numpy as np -import pandas as pd -import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random -from ..base import SklearnIndividual, SklearnIndividualGenerator -from ..tuple_index import TupleIndex - -class DynamicUnionPipelineIndividual(SklearnIndividual): - """ - Takes in one search space. - Will produce a FeatureUnion of up to max_estimators number of steps. - The output of the FeatureUnion will the all of the steps concatenated together. - - """ - - def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=None, rng=None) -> None: - super().__init__() - self.search_space = search_space - - if max_estimators is None: - self.max_estimators = np.inf - else: - self.max_estimators = max_estimators - - self.pipeline = [] - - if self.max_estimators == np.inf: - init_max = 3 - else: - init_max = self.max_estimators - - rng = np.random.default_rng(rng) - - for _ in range(rng.integers(1, init_max)): - self.pipeline.append(self.search_space.generate(rng)) - - def mutate(self, rng=None): - rng = np.random.default_rng(rng) - mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step] - rng.shuffle(mutation_funcs) - for mutation_func in mutation_funcs: - if mutation_func(rng): - return True - - def _mutate_add_step(self, rng): - rng = np.random.default_rng(rng) - if len(self.pipeline) < self.max_estimators: - self.pipeline.append(self.search_space.generate(rng)) - return True - return False - - def _mutate_remove_step(self, rng): - rng = np.random.default_rng(rng) - if len(self.pipeline) > 1: - self.pipeline.pop(rng.integers(0, len(self.pipeline))) - return True - return False - - def _mutate_replace_step(self, rng): - rng = np.random.default_rng(rng) - idx = rng.integers(0, len(self.pipeline)) - self.pipeline[idx] = self.search_space.generate(rng) - return True - - #TODO mutate one step or multiple? - def _mutate_inner_step(self, rng): - rng = np.random.default_rng(rng) - indexes = rng.random(len(self.pipeline)) < 0.5 - indexes = np.where(indexes)[0] - mutated = False - if len(indexes) > 0: - for idx in indexes: - if self.pipeline[idx].mutate(rng): - mutated = True - else: - mutated = self.pipeline[rng.integers(0, len(self.pipeline))].mutate(rng) - - return mutated - - - def _crossover(self, other, rng=None): - rng = np.random.default_rng(rng) - - cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] - rng.shuffle(cx_funcs) - for cx_func in cx_funcs: - if cx_func(other, rng): - return True - - return False - - def _crossover_swap_step(self, other, rng): - rng = np.random.default_rng(rng) - idx = rng.integers(1,len(self.pipeline)) - idx2 = rng.integers(1,len(other.pipeline)) - - self.pipeline[idx], other.pipeline[idx2] = other.pipeline[idx2], self.pipeline[idx] - # self.pipeline[idx] = other.pipeline[idx2] - return True - - def _crossover_swap_random_steps(self, other, rng): - rng = np.random.default_rng(rng) - - max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) - max_steps = max(max_steps, 1) - - if max_steps == 1: - n_steps_to_swap = 1 - else: - n_steps_to_swap = rng.integers(1, max_steps) - - other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) - self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) - - # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] - - for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): - self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] - - return True - - - - def _crossover_inner_step(self, other, rng): - rng = np.random.default_rng(rng) - - #randomly select pairs of steps to crossover - indexes = list(range(1, len(self.pipeline))) - other_indexes = list(range(1, len(other.pipeline))) - #shuffle - rng.shuffle(indexes) - rng.shuffle(other_indexes) - - crossover_success = False - for idx, other_idx in zip(indexes, other_indexes): - if self.pipeline[idx].crossover(other.pipeline[other_idx], rng): - crossover_success = True - - return crossover_success - - def export_pipeline(self): - return sklearn.pipeline.make_union(*[step.export_pipeline() for step in self.pipeline]) - - def unique_id(self): - l = [step.unique_id() for step in self.pipeline] - # if all items are strings, then sort them - if all([isinstance(x, str) for x in l]): - l.sort() - l = ["FeatureUnion"] + l - return TupleIndex(tuple(l)) - - -class DynamicUnionPipeline(SklearnIndividualGenerator): - def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None: - """ - Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index. - """ - - self.search_spaces = search_spaces - - def generate(self, rng=None): - return DynamicUnionPipelineIndividual(self.search_spaces) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py index 401c16ef..01651c29 100644 --- a/tpot2/search_spaces/pipelines/dynamicunion.py +++ b/tpot2/search_spaces/pipelines/dynamicunion.py @@ -42,7 +42,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=Non def mutate(self, rng=None): rng = np.random.default_rng(rng) - mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step] + mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_note] rng.shuffle(mutation_funcs) for mutation_func in mutation_funcs: if mutation_func(rng): @@ -72,7 +72,7 @@ def _mutate_replace_step(self, rng): return changed #TODO mutate one step or multiple? - def _mutate_inner_step(self, rng): + def _mutate_note(self, rng): rng = np.random.default_rng(rng) changed = False values = list(self.union_dict.values()) @@ -88,7 +88,7 @@ def _mutate_inner_step(self, rng): def _crossover(self, other, rng=None): rng = np.random.default_rng(rng) - cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] + cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node] rng.shuffle(cx_funcs) for cx_func in cx_funcs: if cx_func(other, rng): @@ -96,7 +96,7 @@ def _crossover(self, other, rng=None): return False - def _crossover_swap_step(self, other, rng): + def _crossover_swap_node(self, other, rng): rng = np.random.default_rng(rng) changed = False @@ -117,7 +117,7 @@ def _crossover_swap_step(self, other, rng): - def _crossover_swap_random_steps(self, other, rng): + def _crossover_swap_multiple_nodes(self, other, rng): rng = np.random.default_rng(rng) self_values = list(self.union_dict.values()) other_values = list(other.union_dict.values()) @@ -136,7 +136,7 @@ def _crossover_swap_random_steps(self, other, rng): return True - def _crossover_inner_step(self, other, rng): + def _crossover_node(self, other, rng): rng = np.random.default_rng(rng) changed = False diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py index 7a7e6a99..2fa15f9a 100644 --- a/tpot2/search_spaces/pipelines/sequential.py +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -44,7 +44,7 @@ def _crossover(self, other, rng=None): return False rng = np.random.default_rng(rng) - cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step] + cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_swap_segment, self._crossover_node] rng.shuffle(cx_funcs) for cx_func in cx_funcs: @@ -53,7 +53,7 @@ def _crossover(self, other, rng=None): return False - def _crossover_swap_step(self, other, rng): + def _crossover_swap_node(self, other, rng): if len(self.pipeline) != len(other.pipeline): return False @@ -64,7 +64,7 @@ def _crossover_swap_step(self, other, rng): self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True - def _crossover_swap_random_steps(self, other, rng): + def _crossover_swap_multiple_nodes(self, other, rng): if len(self.pipeline) != len(other.pipeline): return False @@ -108,7 +108,7 @@ def _crossover_swap_segment(self, other, rng): return True - def _crossover_inner_step(self, other, rng): + def _crossover_node(self, other, rng): rng = np.random.default_rng(rng) # crossover_success = False diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py index 32f988e6..a9f8215a 100644 --- a/tpot2/search_spaces/pipelines/union.py +++ b/tpot2/search_spaces/pipelines/union.py @@ -34,7 +34,7 @@ def _crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline rng = np.random.default_rng(rng) - cx_funcs = [self._crossover_inner_step] + cx_funcs = [self._crossover_node, self._crossover_swap_node] rng.shuffle(cx_funcs) for cx_func in cx_funcs: if cx_func(other, rng): @@ -42,35 +42,14 @@ def _crossover(self, other, rng=None): return False - def _crossover_swap_step(self, other, rng): + def _crossover_swap_node(self, other, rng): rng = np.random.default_rng(rng) idx = rng.integers(1,len(self.pipeline)) self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True - - def _crossover_swap_random_steps(self, other, rng): - rng = np.random.default_rng(rng) - - max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) - max_steps = max(max_steps, 1) - - if max_steps == 1: - n_steps_to_swap = 1 - else: - n_steps_to_swap = rng.integers(1, max_steps) - - other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) - self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) - - # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] - - for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): - self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] - - return True - def _crossover_inner_step(self, other, rng): + def _crossover_node(self, other, rng): rng = np.random.default_rng(rng) crossover_success = False From e3886513b627ce882ba0ad1fcf6d78f062752c77 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 18:21:48 -0700 Subject: [PATCH 67/75] wrapper for crossover functions so that subclasses can use crossover instead of _crossover --- tpot2/search_spaces/base.py | 22 ++++++--- tpot2/search_spaces/nodes/estimator_node.py | 2 +- tpot2/search_spaces/nodes/fss_node.py | 2 +- .../nodes/genetic_feature_selection.py | 2 +- tpot2/search_spaces/pipelines/choice.py | 2 +- .../search_spaces/pipelines/dynamic_linear.py | 2 +- tpot2/search_spaces/pipelines/dynamicunion.py | 25 ++-------- tpot2/search_spaces/pipelines/graph.py | 46 +++++++++---------- tpot2/search_spaces/pipelines/sequential.py | 2 +- tpot2/search_spaces/pipelines/union.py | 2 +- tpot2/search_spaces/pipelines/wrapper.py | 2 +- 11 files changed, 48 insertions(+), 61 deletions(-) diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 2977d491..3133057e 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -15,23 +15,31 @@ + class SklearnIndividual(tpot2.BaseIndividual): + def __init_subclass__(cls): + cls.crossover = cls.validate_same_type(cls.crossover) + + def __init__(self,) -> None: super().__init__() def mutate(self, rng=None): return - @final def crossover(self, other, rng=None, **kwargs): - if not isinstance(other, type(self)): - return False - return self._crossover(other, rng=rng, **kwargs) + return - @abstractmethod - def _crossover(self, other, rng=None): - return + @final + def validate_same_type(func): + + def wrapper(self, other, rng=None, **kwargs): + if not isinstance(other, type(self)): + return False + return func(self, other, rng=None, **kwargs) + + return wrapper def export_pipeline(self) -> BaseEstimator: return diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index 4724405e..50d698f3 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -60,7 +60,7 @@ def mutate(self, rng=None): self.check_hyperparameters_for_None() return True - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): if isinstance(self.space, dict): return False diff --git a/tpot2/search_spaces/nodes/fss_node.py b/tpot2/search_spaces/nodes/fss_node.py index 46aef024..4dda0d92 100644 --- a/tpot2/search_spaces/nodes/fss_node.py +++ b/tpot2/search_spaces/nodes/fss_node.py @@ -51,7 +51,7 @@ def mutate(self, rng=None): self.sel_subset = self.subset_dict[self.selected_subset_name] - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): self.selected_subset_name = other.selected_subset_name self.sel_subset = other.sel_subset diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index 9e36e666..f9c4892a 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -69,7 +69,7 @@ def mutate(self, rng=None): return rng.choice(self.mutation_list)(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): rng = np.random.default_rng(rng) if rng.uniform() < self.crossover_rate_rate: diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py index ebe9c51c..25051aa0 100644 --- a/tpot2/search_spaces/pipelines/choice.py +++ b/tpot2/search_spaces/pipelines/choice.py @@ -29,7 +29,7 @@ def _mutate_select_new_node(self, rng=None): def _mutate_node(self, rng=None): return self.node.mutate(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): return self.node.crossover(other.node, rng) def export_pipeline(self): diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py index 2ff2bf0b..528ec7c4 100644 --- a/tpot2/search_spaces/pipelines/dynamic_linear.py +++ b/tpot2/search_spaces/pipelines/dynamic_linear.py @@ -65,7 +65,7 @@ def _mutate_step(self, rng=None): return step.mutate(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline rng = np.random.default_rng(rng) diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py index 01651c29..8d8772eb 100644 --- a/tpot2/search_spaces/pipelines/dynamicunion.py +++ b/tpot2/search_spaces/pipelines/dynamicunion.py @@ -85,7 +85,7 @@ def _mutate_note(self, rng): return changed - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): rng = np.random.default_rng(rng) cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node] @@ -95,28 +95,8 @@ def _crossover(self, other, rng=None): return True return False - - def _crossover_swap_node(self, other, rng): - rng = np.random.default_rng(rng) - changed = False - - self_step = rng.choice(list(self.union_dict.values())) - other_step = rng.choice(list(other.union_dict.values())) - - if other_step.unique_id() in self.union_dict: - self.union_dict[other_step.unique_id()] = other_step - self.union_dict.pop(self_step.unique_id()) - changed = True - - if self_step.unique_id() in other.union_dict: - other.union_dict[self_step.unique_id()] = self_step - other.union_dict.pop(other_step.unique_id()) - return changed - - - - + def _crossover_swap_multiple_nodes(self, other, rng): rng = np.random.default_rng(rng) self_values = list(self.union_dict.values()) @@ -128,6 +108,7 @@ def _crossover_swap_multiple_nodes(self, other, rng): self_idx = rng.integers(0,len(self_values)) other_idx = rng.integers(0,len(other_values)) + #Note that this is not one-point-crossover since the sequence doesn't matter. this is just a quick way to swap multiple random items self_values[:self_idx], other_values[:other_idx] = other_values[:other_idx], self_values[:self_idx] self.union_dict = {step.unique_id(): step for step in self_values} diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py index e3e49a1b..fc769b1c 100644 --- a/tpot2/search_spaces/pipelines/graph.py +++ b/tpot2/search_spaces/pipelines/graph.py @@ -111,35 +111,33 @@ def __init__( def mutate(self, rng=None): rng = np.random.default_rng(rng) + rng.shuffle(self.mutate_methods_list) + for mutate_method in self.mutate_methods_list: + if mutate_method(rng=rng): + + if self.merge_duplicated_nodes_toggle: + self._merge_duplicated_nodes() - for i in range(0,random.randint(1,15)): - rng.shuffle(self.mutate_methods_list) - for mutate_method in self.mutate_methods_list: - if mutate_method(rng=rng): - - if self.merge_duplicated_nodes_toggle: - self._merge_duplicated_nodes() - - if self.__debug: - print(mutate_method) + if self.__debug: + print(mutate_method) - if self.root not in self.graph.nodes: - print('lost root something went wrong with ', mutate_method) + if self.root not in self.graph.nodes: + print('lost root something went wrong with ', mutate_method) - if len(self.graph.predecessors(self.root)) > 0: - print('root has parents ', mutate_method) + if len(self.graph.predecessors(self.root)) > 0: + print('root has parents ', mutate_method) - if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]): - print('a node is connecting to itself...') + if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]): + print('a node is connecting to itself...') - if self.__debug: - try: - nx.find_cycle(self.graph) - print('something went wrong with ', mutate_method) - except: - pass + if self.__debug: + try: + nx.find_cycle(self.graph) + print('something went wrong with ', mutate_method) + except: + pass - self.graphkey = None + self.graphkey = None return False @@ -323,7 +321,7 @@ def _mutate_insert_bypass_node(self, rng=None): return False - def _crossover(self, ind2, rng=None): + def crossover(self, ind2, rng=None): ''' self is the first individual, ind2 is the second individual If crossover_same_depth, it will select graphindividuals at the same recursive depth. diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py index 2fa15f9a..75bad8d2 100644 --- a/tpot2/search_spaces/pipelines/sequential.py +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -38,7 +38,7 @@ def mutate(self, rng=None): return step.mutate(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline if len(self.pipeline) != len(other.pipeline): return False diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py index a9f8215a..811ef38b 100644 --- a/tpot2/search_spaces/pipelines/union.py +++ b/tpot2/search_spaces/pipelines/union.py @@ -30,7 +30,7 @@ def mutate(self, rng=None): return step.mutate(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline rng = np.random.default_rng(rng) diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index 7b49e182..df504a89 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -62,7 +62,7 @@ def _mutate_hyperparameters(self, rng=None): def _mutate_node(self, rng=None): return self.node.mutate(rng) - def _crossover(self, other, rng=None): + def crossover(self, other, rng=None): if rng.choice([True, False]): return self._crossover_hyperparameters(other, rng) else: From a250ed2efc2ef671f63401baf61ca652bce273d4 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 18:25:16 -0700 Subject: [PATCH 68/75] tpot estimator documentation edit --- tpot2/tpot_estimator/estimator.py | 35 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index f4c0e954..9bc33e8a 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -112,7 +112,7 @@ def __init__(self, Parameters ---------- - default_search_space : (String, tpot2.search_spaces.SklearnIndividualGenerator) + search_space : (String, tpot2.search_spaces.SklearnIndividualGenerator) - String : The default search space to use for the optimization. This can be either "linear" or "graph". If "linear", will use the default linear pipeline search space. If "graph", will use the default graph pipeline search space. - SklearnIndividualGenerator : The search space to use for the optimization. This should be an instance of a SklearnIndividualGenerator. The search space to use for the optimization. This should be an instance of a SklearnIndividualGenerator. @@ -145,6 +145,7 @@ def __init__(self, bigger_is_better : bool, default=True If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction. + cross_val_predict_cv : int, default=0 Number of folds to use for the cross_val_predict function for inner classifiers and regressors. Estimators will still be fit on the full dataset, but the following node will get the outputs from cross_val_predict. @@ -152,20 +153,6 @@ def __init__(self, - >=2 : When fitting pipelines with inner classifiers or regressors, they will still be fit on the full dataset. However, the output to the next node will come from cross_val_predict with the specified number of folds. - categorical_features: list or None - Categorical columns to inpute and/or one hot encode during the preprocessing step. Used only if preprocessing is not False. - - None : If None, TPOT2 will automatically use object columns in pandas dataframes as objects for one hot encoding in preprocessing. - - List of categorical features. If X is a dataframe, this should be a list of column names. If X is a numpy array, this should be a list of column indices - - subsets : str or list, default=None - Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries. - - str : If a string, it is assumed to be a path to a csv file with the subsets. - The first column is assumed to be the name of the subset and the remaining columns are the features in the subset. - - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets. - - None : If None, each column will be treated as a subset. One column will be selected per subset. - If subsets is None, each column will be treated as a subset. One column will be selected per subset. - - memory: Memory object or string, default=None If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters @@ -180,7 +167,20 @@ def __init__(self, TPOT uses the instance of joblib.Memory for memory caching, and TPOT does NOT clean the caching directory up upon shutdown. - None: - TPOT does not use memory caching. + TPOT does not use memory caching. + + categorical_features: list or None + Categorical columns to inpute and/or one hot encode during the preprocessing step. Used only if preprocessing is not False. + - None : If None, TPOT2 will automatically use object columns in pandas dataframes as objects for one hot encoding in preprocessing. + - List of categorical features. If X is a dataframe, this should be a list of column names. If X is a numpy array, this should be a list of column indices + + subsets : str or list, default=None + Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries. + - str : If a string, it is assumed to be a path to a csv file with the subsets. + The first column is assumed to be the name of the subset and the remaining columns are the features in the subset. + - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets. + - None : If None, each column will be treated as a subset. One column will be selected per subset. + If subsets is None, each column will be treated as a subset. One column will be selected per subset. preprocessing : bool or BaseEstimator/Pipeline, EXPERIMENTAL @@ -329,6 +329,9 @@ def __init__(self, >=5. full warnings trace 6. evaluations progress bar. (Temporary: This used to be 2. Currently, using evaluation progress bar may prevent some instances were we terminate a generation early due to it reaching max_time_seconds in the middle of a generation OR a pipeline failed to be terminated normally and we need to manually terminate it.) + scatter : bool, default=True + If True, will scatter the data to the dask workers. If False, will not scatter the data. This can be useful for debugging. + random_state : int, None, default=None A seed for reproducability of experiments. This value will be passed to numpy.random.default_rng() to create an instnce of the genrator to pass to other classes From 6905e6f91b6b86d52a0334a32c06d9462e23ada6 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 8 Jul 2024 19:12:00 -0700 Subject: [PATCH 69/75] update documentation --- tpot2/tpot_estimator/estimator.py | 48 ++++++++++--------- .../tpot_estimator/templates/tpottemplates.py | 8 ++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index 9bc33e8a..bb1f3b3f 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -184,8 +184,8 @@ def __init__(self, preprocessing : bool or BaseEstimator/Pipeline, EXPERIMENTAL - A pipeline that will be used to preprocess the data before CV. - - bool : If True, will use a default preprocessing pipeline. + A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized. + - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding. - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline. population_size : int, default=50 @@ -562,7 +562,7 @@ def fit(self, X, y): if self.categorical_features is not None: #if categorical features are specified, use those pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent'))) + pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent'))) else: if isinstance(X, pd.DataFrame): @@ -570,7 +570,7 @@ def fit(self, X, y): if len(categorical_columns) > 0: pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent'))) + pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent'))) else: pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean'))) else: @@ -661,30 +661,34 @@ def objective_function(pipeline_individual, self._search_space = get_default_search_space(self.search_space, classification=True, inner_predictors=True, **get_search_space_params) - if check_empty_values(X): - from sklearn.experimental import enable_iterative_imputer + # TODO : Add check for empty values in X and if so, add imputation to the search space + # make this depend on self.preprocessing + # if check_empty_values(X): + # from sklearn.experimental import enable_iterative_imputer - from ConfigSpace import ConfigurationSpace - from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal - iterative_imputer_cs = ConfigurationSpace( - space = { - 'n_nearest_features' : Categorical('n_nearest_features', [100]), - 'initial_strategy' : Categorical('initial_strategy', ['mean','median', 'most_frequent', ]), - 'add_indicator' : Categorical('add_indicator', [True, False]), - } - ) + # from ConfigSpace import ConfigurationSpace + # from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal + # iterative_imputer_cs = ConfigurationSpace( + # space = { + # 'n_nearest_features' : Categorical('n_nearest_features', [100]), + # 'initial_strategy' : Categorical('initial_strategy', ['mean','median', 'most_frequent', ]), + # 'add_indicator' : Categorical('add_indicator', [True, False]), + # } + # ) - imputation_search = tpot2.search_spaces.pipelines.ChoicePipeline([ - tpot2.config.get_search_space("SimpleImputer"), - tpot2.search_spaces.nodes.EstimatorNode(sklearn.impute.IterativeImputer, iterative_imputer_cs) - ]) + # imputation_search = tpot2.search_spaces.pipelines.ChoicePipeline([ + # tpot2.config.get_search_space("SimpleImputer"), + # tpot2.search_spaces.nodes.EstimatorNode(sklearn.impute.IterativeImputer, iterative_imputer_cs) + # ]) - self.search_space_final = tpot2.search_spaces.pipelines.SequentialPipeline(search_spaces=[ imputation_search, self._search_space], memory="sklearn_pipeline_memory") - else: - self.search_space_final = self._search_space + # self.search_space_final = tpot2.search_spaces.pipelines.SequentialPipeline(search_spaces=[ imputation_search, self._search_space], memory="sklearn_pipeline_memory") + # else: + # self.search_space_final = self._search_space + + self.search_space_final = self._search_space def ind_generator(rng): rng = np.random.default_rng(rng) diff --git a/tpot2/tpot_estimator/templates/tpottemplates.py b/tpot2/tpot_estimator/templates/tpottemplates.py index d31dcc88..3871e6e1 100644 --- a/tpot2/tpot_estimator/templates/tpottemplates.py +++ b/tpot2/tpot_estimator/templates/tpottemplates.py @@ -104,8 +104,8 @@ def __init__( self, preprocessing : bool or BaseEstimator/Pipeline, EXPERIMENTAL - A pipeline that will be used to preprocess the data before CV. - - bool : If True, will use a default preprocessing pipeline. + A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized. + - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding. - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline. max_time_seconds : float, default=float("inf") @@ -358,8 +358,8 @@ def __init__( self, preprocessing : bool or BaseEstimator/Pipeline, EXPERIMENTAL - A pipeline that will be used to preprocess the data before CV. - - bool : If True, will use a default preprocessing pipeline. + A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized. + - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding. - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline. max_time_seconds : float, default=float("inf") From f7b4b270e72e8ac77fd6caa815a5d1ee3ed3266d Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 9 Jul 2024 13:45:26 -0700 Subject: [PATCH 70/75] fixed imputation tutorial --- .../Example_Search_Spaces/imputation.ipynb | 507 +++++++++++++++++- 1 file changed, 481 insertions(+), 26 deletions(-) diff --git a/Tutorial/Example_Search_Spaces/imputation.ipynb b/Tutorial/Example_Search_Spaces/imputation.ipynb index 07532532..b6de7ef8 100644 --- a/Tutorial/Example_Search_Spaces/imputation.ipynb +++ b/Tutorial/Example_Search_Spaces/imputation.ipynb @@ -2,16 +2,32 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Configuration(values={\n", + " 'add_indicator': False,\n", + " 'strategy': 'most_frequent',\n", + "})" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from ConfigSpace import ConfigurationSpace\n", "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n", + "import tpot2\n", + "from sklearn.impute import SimpleImputer\n", "\n", "simple_imputer = ConfigurationSpace(\n", " space = {\n", - " 'strategy' : Categorical('strategy', [['mean','median',], ['most_frequent'] ]),\n", + " 'strategy' : Categorical('strategy', ['mean','median','most_frequent']),\n", " 'add_indicator' : Categorical('add_indicator', [True, False]), \n", " }\n", ")\n", @@ -21,43 +37,482 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
Pipeline(steps=[('simpleimputer',\n",
+       "                 SimpleImputer(add_indicator=True, strategy='median')),\n",
+       "                ('selectpercentile',\n",
+       "                 SelectPercentile(percentile=44.546578384975824)),\n",
+       "                ('featureagglomeration',\n",
+       "                 FeatureAgglomeration(linkage='complete', metric='cosine',\n",
+       "                                      n_clusters=102,\n",
+       "                                      pooling_func=<function median at 0x711a67539830>)),\n",
+       "                ('extratreesclassifier',\n",
+       "                 ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
+       "                                      max_features=0.9974817877523433,\n",
+       "                                      min_samples_leaf=8, min_samples_split=20,\n",
+       "                                      n_jobs=1))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ - "Configuration(values={\n", - " '2': 2,\n", - " 'a': 2,\n", - "})" + "Pipeline(steps=[('simpleimputer',\n", + " SimpleImputer(add_indicator=True, strategy='median')),\n", + " ('selectpercentile',\n", + " SelectPercentile(percentile=44.546578384975824)),\n", + " ('featureagglomeration',\n", + " FeatureAgglomeration(linkage='complete', metric='cosine',\n", + " n_clusters=102,\n", + " pooling_func=)),\n", + " ('extratreesclassifier',\n", + " ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n", + " max_features=0.9974817877523433,\n", + " min_samples_leaf=8, min_samples_split=20,\n", + " n_jobs=1))])" ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from ConfigSpace import ConfigurationSpace, EqualsCondition\n", - "import ConfigSpace\n", - "\n", - "cs = ConfigurationSpace({\n", - "\n", - " \"1\": [1,2,3],\n", - " \"2\": ConfigSpace.Constant(\"2\", 2),\n", - "\n", - " \"a\": [1, 2, 3],\n", - "\n", - "})\n", + "imputation_node =tpot2.search_spaces.nodes.EstimatorNode(\n", + " method = SimpleImputer,\n", + " space = simple_imputer,\n", + ")\n", "\n", - "cond = EqualsCondition(cs['1'], cs['a'], 1)\n", - "cond2 = EqualsCondition(cs['2'], cs['a'], 2)\n", + "impute_classifier_space = tpot2.search_spaces.pipelines.SequentialPipeline([\n", + " imputation_node,\n", + " tpot2.config.get_search_space(\"selectors\"), \n", + " tpot2.config.get_search_space(\"transformers\"),\n", + " tpot2.config.get_search_space(\"classifiers\"),\n", + " \n", + "])\n", "\n", - "cs.add_condition(cond)\n", - "cs.add_condition(cond2)\n", "\n", - "cs.sample_configuration()" + "impute_classifier_space.generate().export_pipeline()" ] } ], @@ -77,7 +532,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.14" } }, "nbformat": 4, From 76f76faf4a59181813d361ef42aa8ae0be391d36 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 9 Jul 2024 14:37:39 -0700 Subject: [PATCH 71/75] fix --- tpot2/search_spaces/pipelines/wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index df504a89..2c1ad138 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -63,6 +63,7 @@ def _mutate_node(self, rng=None): return self.node.mutate(rng) def crossover(self, other, rng=None): + rng = np.random.default_rng(rng) if rng.choice([True, False]): return self._crossover_hyperparameters(other, rng) else: From 334ca58319d767aecd097232c09ddc6596ca659a Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 9 Jul 2024 14:41:36 -0700 Subject: [PATCH 72/75] wrap fix --- tpot2/search_spaces/pipelines/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index 2c1ad138..d61bc5f3 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -67,7 +67,7 @@ def crossover(self, other, rng=None): if rng.choice([True, False]): return self._crossover_hyperparameters(other, rng) else: - self.estimator_search_space.crossover(other.estimator_search_space, rng) + self.node.crossover(other.estimator_search_space, rng) def _crossover_hyperparameters(self, other, rng=None): From 8c0379c4c2cfca3bf414cd51174505ce5bde8561 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 9 Jul 2024 17:46:10 -0700 Subject: [PATCH 73/75] added check for infinite max_eval_time_seconds --- tpot2/evolvers/steady_state_evolver.py | 19 ++++++++++--------- tpot2/utils/eval_utils.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index eecc2b29..e0b9b593 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -299,17 +299,18 @@ def optimize(self): eval_error = "INVALID" else: #if future is not done - #check if the future has been running for too long, cancel the future - if time.time() - submitted_futures[completed_future]["time"] > self.max_eval_time_seconds*1.25: - completed_future.cancel() + if self.max_eval_time_seconds is not None: + #check if the future has been running for too long, cancel the future + if time.time() - submitted_futures[completed_future]["time"] > self.max_eval_time_seconds*1.25: + completed_future.cancel() - if self.verbose >= 4: - print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n') + if self.verbose >= 4: + print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n') - scores = [np.nan for _ in range(len(self.objective_names))] - eval_error = "TIMEOUT" - else: - continue #otherwise, continue to next future + scores = [np.nan for _ in range(len(self.objective_names))] + eval_error = "TIMEOUT" + else: + continue #otherwise, continue to next future diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py index f37cb823..f8d4bd7f 100644 --- a/tpot2/utils/eval_utils.py +++ b/tpot2/utils/eval_utils.py @@ -218,7 +218,7 @@ def parallel_eval_objective_list2(individual_list, #check if the future has been running for too long, cancel the future - if time.time() - submitted_futures[completed_future]["time"] > max_eval_time_seconds*1.25: + if max_eval_time_seconds is not None and time.time() - submitted_futures[completed_future]["time"] > max_eval_time_seconds*1.25: completed_future.cancel() if verbose >= 4: From dc1fb8aa03c705c0110af292b4b8d473fef9559b Mon Sep 17 00:00:00 2001 From: Jay Moran Date: Wed, 10 Jul 2024 15:37:39 -0700 Subject: [PATCH 74/75] Print out package versions and make tests verbose --- tox.ini | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 4e250aef..f215d6d0 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,8 @@ setenv = deps = -r{toxinidir}/requirements_dev.txt commands = - pytest --basetemp={envtmpdir} + pip freeze + pytest --basetemp={envtmpdir} -v [testenv:flake8] basepython = python3.10 @@ -27,4 +28,4 @@ commands = flake8 tpot2 basepython = python3.10 deps = -r{toxinidir}/requirements_dev.txt -commands = mypy tpot2 \ No newline at end of file +commands = mypy tpot2 From 76d3989e8cd6bc93144ef79a49fe6e6837be425e Mon Sep 17 00:00:00 2001 From: Jay Moran Date: Wed, 10 Jul 2024 16:59:13 -0700 Subject: [PATCH 75/75] Pin numpy version --- setup.py | 2 +- tox.ini | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 27b4a474..0a404280 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def calculate_version(): ''', zip_safe=True, - install_requires=['numpy>=1.26.4', + install_requires=['numpy==1.26.4', 'scipy>=1.3.1', 'scikit-learn>=1.3.0', 'update_checker>=0.16', diff --git a/tox.ini b/tox.ini index f215d6d0..7177d0a7 100644 --- a/tox.ini +++ b/tox.ini @@ -16,8 +16,7 @@ setenv = deps = -r{toxinidir}/requirements_dev.txt commands = - pip freeze - pytest --basetemp={envtmpdir} -v + pytest --basetemp={envtmpdir} [testenv:flake8] basepython = python3.10