diff --git a/Dockerfile b/Dockerfile index 010a5937..89b64af3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,11 +9,7 @@ ENV JUPYTER_CONF_FILE /root/.jupyter/jupyter_notebook_config.py RUN echo "c.NotebookApp.allow_origin = '*'" > $JUPYTER_CONF_FILE RUN echo "c.NotebookApp.ip = '0.0.0.0'" >> $JUPYTER_CONF_FILE RUN mkdir -p /notebook -<<<<<<< HEAD COPY demo/*.ipynb /notebook/ COPY test/resources/datasets/*.csv /notebook/datasets/ -======= -COPY demo/DemoClassificationDisc.ipynb /notebook ->>>>>>> chore: move demos into demo/ directory WORKDIR /notebook -CMD jupyter notebook --allow-root --no-browser +CMD jupyter notebook --allow-root --no-browser \ No newline at end of file diff --git a/demo/DemoClassification.ipynb b/demo/DemoClassification.ipynb new file mode 100644 index 00000000..393649c5 --- /dev/null +++ b/demo/DemoClassification.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# PSyKE's demo\n", + "\n", + "Some imports." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6b710e7c", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.datasets import load_iris\n", + "import pandas as pd\n", + "\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from psyke.cart.predictor import CartPredictor\n", + "\n", + "from psyke import Extractor\n", + "from psyke.regression.strategy import AdaptiveStrategy\n", + "from psyke.regression import Grid, FeatureRanker, HyperCubeExtractor\n", + "from psyke.utils.logic import pretty_theory" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Import iris dataset separating features and class." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f8e46c49", + "metadata": {}, + "outputs": [], + "source": [ + "x, y = load_iris(return_X_y=True, as_frame=True)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Rename of the features." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "38d5afb0", + "metadata": {}, + "outputs": [], + "source": [ + "x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Replace integer indices with the corresponding string class." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4f807185", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": " target\n0 setosa\n1 setosa\n2 setosa\n3 setosa\n4 setosa\n.. ...\n145 versicolor\n146 versicolor\n147 versicolor\n148 versicolor\n149 versicolor\n\n[150 rows x 1 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
target
0setosa
1setosa
2setosa
3setosa
4setosa
......
145versicolor
146versicolor
147versicolor
148versicolor
149versicolor
\n

150 rows × 1 columns

\n
" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = pd.DataFrame(y).replace({\"target\": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})\n", + "y" + ] + }, + { + "cell_type": "markdown", + "source": [ + "The final dataset:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7ac49b4e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": " SepalLength SepalWidth PetalLength PetalWidth iris\n0 5.1 3.5 1.4 0.2 setosa\n1 4.9 3.0 1.4 0.2 setosa\n2 4.7 3.2 1.3 0.2 setosa\n3 4.6 3.1 1.5 0.2 setosa\n4 5.0 3.6 1.4 0.2 setosa\n.. ... ... ... ... ...\n145 6.7 3.0 5.2 2.3 versicolor\n146 6.3 2.5 5.0 1.9 versicolor\n147 6.5 3.0 5.2 2.0 versicolor\n148 6.2 3.4 5.4 2.3 versicolor\n149 5.9 3.0 5.1 1.8 versicolor\n\n[150 rows x 5 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SepalLengthSepalWidthPetalLengthPetalWidthiris
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
..................
1456.73.05.22.3versicolor
1466.32.55.01.9versicolor
1476.53.05.22.0versicolor
1486.23.45.42.3versicolor
1495.93.05.11.8versicolor
\n

150 rows × 5 columns

\n
" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = x.join(y)\n", + "dataset.columns = [*dataset.columns[:-1], 'iris']\n", + "dataset" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Split between train and test set in a reproducible way." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "03fc5e2c", + "metadata": {}, + "outputs": [], + "source": [ + "train, test = train_test_split(dataset, test_size=0.5, random_state=0)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "We use as predictor a KNN with K = 7 and we train it." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "aa8a3128", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": "0.9733333333333334" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = KNeighborsClassifier(n_neighbors=4)\n", + "predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + "predictor.score(test.iloc[:, :-1], test.iloc[:, -1])" + ] + }, + { + "cell_type": "markdown", + "source": [ + "We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CART performance (3 rules):\n", + "Accuracy = 0.92\n", + "Fidelity = 0.92\n", + "\n", + "\n", + "CART extracted rules:\n", + "\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", + " PetalWidth =< 0.75.\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", + " PetalWidth =< 1.55.\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor).\n" + ] + } + ], + "source": [ + "DTClassifier = DecisionTreeClassifier().fit(train.iloc[:, :-1], predictor.predict(train.iloc[:, :-1]))\n", + "cart = Extractor.cart(CartPredictor(DTClassifier))\n", + "theory_from_cart = cart.extract(train)\n", + "print(f'CART performance ({cart.n_rules} rules):')\n", + "print(f'Accuracy = {cart.accuracy(test):.2f}')\n", + "print(f'Fidelity = {cart.accuracy(test, predictor):.2f}\\n')\n", + "print('\\nCART extracted rules:\\n\\n' + pretty_theory(theory_from_cart))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We create a GridEx extractor to extract prolog rules from the same KNN." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GridEx performance (3 rules):\n", + "Accuracy = 0.88\n", + "Accuracy fidelity = 0.91\n", + "\n", + "GridEx extracted rules:\n", + "\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", + " PetalLength in [1.199999, 1.912499].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", + " PetalLength in [2.625, 4.7625].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", + " PetalLength in [4.7625, 6.900001].\n" + ] + } + ], + "source": [ + "ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()\n", + "gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)\n", + "theory_from_gridEx = gridEx.extract(train)\n", + "print('GridEx performance ({} rules):\\nAccuracy = {:.2f}\\nAccuracy fidelity = {:.2f}\\n'\n", + " .format(gridEx.n_rules, gridEx.accuracy(test), gridEx.accuracy(test, predictor)))\n", + "print('GridEx extracted rules:\\n\\n' + pretty_theory(theory_from_gridEx))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We use CReEPy and CREAM cluster-based extractors to perform the extraction." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CReEPy performance (3 rules):\n", + "Accuracy = 0.79\n", + "Fidelity = 0.81\n", + "\n", + "CReEPy extracted rules:\n", + "\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", + " SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", + " SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", + " SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].\n" + ] + } + ], + "source": [ + "creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n", + "theory_from_creepy = creepy.extract(train)\n", + "print('CReEPy performance ({} rules):\\nAccuracy = {:.2f}\\nFidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.accuracy(test), creepy.accuracy(test, predictor)))\n", + "print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_creepy))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CREAM performance (3 rules):\n", + "Accuracy = 0.79\n", + "Fidelity = 0.81\n", + "\n", + "CREAM extracted rules:\n", + "\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", + " SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", + " SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].\n", + "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", + " SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].\n" + ] + } + ], + "source": [ + "cream = Extractor.cream(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n", + "theory_from_cream = cream.extract(train)\n", + "print('CREAM performance ({} rules):\\nAccuracy = {:.2f}\\nFidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.accuracy(test), cream.accuracy(test, predictor)))\n", + "print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/demo/DemoRegression.ipynb b/demo/DemoRegression.ipynb new file mode 100644 index 00000000..3d6a9f5e --- /dev/null +++ b/demo/DemoRegression.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f52126f3", + "metadata": {}, + "source": [ + "# PSyKE's demo for regression tasks\n", + "\n", + "Some imports." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6b710e7c", + "metadata": {}, + "outputs": [], + "source": [ + "from psyke import Extractor\n", + "from psyke.optimizer.pedro import PEDRO\n", + "from psyke.optimizer import Objective\n", + "from psyke.optimizer.crash import CRASH\n", + "from psyke.regression import HyperCubeExtractor\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from psyke.utils.logic import pretty_theory\n", + "from sklearn.model_selection import train_test_split\n", + "from matplotlib import pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "d7c90ed2", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Import an artificial dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f8e46c49", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = pd.read_csv(\"test/resources/datasets/df.csv\")\n", + "#dataset = pd.read_csv(\"test/resources/datasets/CCPP.csv\", sep=\";\", decimal=\",\")\n", + "#dataset" + ] + }, + { + "cell_type": "markdown", + "id": "d673b766", + "metadata": {}, + "source": [ + "Split between train and test set in a reproducible way." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "03fc5e2c", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset[[\"X\", \"Y\", \"Z4\"]].dropna()\n", + "train, test = train_test_split(dataset, test_size=0.5, random_state=10)" + ] + }, + { + "cell_type": "markdown", + "id": "fa6754a0", + "metadata": {}, + "source": [ + "We use as predictor a KNN and we train it." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bed764ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.03688944007870007\n" + ] + } + ], + "source": [ + "predictor = KNeighborsRegressor(n_neighbors=3).fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + "\n", + "print((abs(predictor.predict(test.iloc[:, :-1]).flatten() - test.iloc[:, -1])).mean())" + ] + }, + { + "cell_type": "markdown", + "id": "96835867", + "metadata": {}, + "source": [ + "We create several extractors that use ITER, GridEx and GridREx algorithms to extract prolog rules from the predictor." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CReEPy performance (4 rules):\n", + "MAE = 0.00\n", + "MAE fidelity = 0.04\n", + "\n", + "CReEPy extracted rules:\n", + "\n", + "'Z4'(X, Y, Z4) :-\n", + " X in [0.402684, 0.543625], Y in [0.416106, 0.597316], Z4 is -6.805486 + 54.455317 * X + -24.861292 * Y.\n", + "'Z4'(X, Y, Z4) :-\n", + " X in [0.402684, 0.597316], Y in [0.402684, 0.597316], Z4 is -6.6031 + 54.353719 * X + -25.132222 * Y.\n", + "'Z4'(X, Y, Z4) :-\n", + " X in [0.154361, 0.845639], Y in [0.154361, 0.845639], Z4 is 9.003897 + -11.99489 * X + 14.988029 * Y.\n", + "'Z4'(X, Y, Z4) :-\n", + " X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001], Z4 is 1.999564 + 3.999876 * X + -2.999378 * Y.\n" + ] + } + ], + "source": [ + "creepy = Extractor.creepy(predictor, depth=3, error_threshold=0.02, output=HyperCubeExtractor.Target.REGRESSION)\n", + "theory_from_creepy = creepy.extract(train)\n", + "print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))\n", + "print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_creepy))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CREAM performance (4 rules):\n", + "MAE = 2.39\n", + "MAE fidelity = 2.38\n", + "\n", + "CREAM extracted rules:\n", + "\n", + "'Z4'(X, Y, 8.051474) :-\n", + " X in [0.402684, 0.597316], Y in [0.402684, 0.597316].\n", + "'Z4'(X, Y, 10.458814) :-\n", + " X in [0.154361, 0.845639], Y in [0.154361, 0.845639].\n", + "'Z4'(X, Y, 0.931622) :-\n", + " X in [-0.000001, 0.496645], Y in [-0.000001, 1.000001].\n", + "'Z4'(X, Y, 4.024491) :-\n", + " X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001].\n" + ] + } + ], + "source": [ + "cream = Extractor.cream(predictor, depth=2, error_threshold=0.02, output=HyperCubeExtractor.Target.CONSTANT)\n", + "theory_from_cream = cream.extract(train)\n", + "print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))\n", + "print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n", + "#plt.ylim((0, 1))\n", + "#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "#ax1.set_xlim((0, 1))\n", + "#ax1.set_aspect(\"equal\")\n", + "#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "#ax2.set_xlim((0, 1))\n", + "#ax2.set_aspect(\"equal\")\n", + "#plt.show()\n", + "\n", + "crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.5, algorithm=CRASH.Algorithm.CReEPy)\n", + "crash.search()\n", + "(_, _, depth, threshold) = crash.get_best()[0]\n", + "\n", + "creepy = Extractor.creepy(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)\n", + "theory_from_creepy = creepy.extract(train)\n", + "print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))\n", + "#print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_cream))\n", + "\n", + "plt.scatter(test.X, test.Y, c=creepy.predict(test.iloc[:, :-1]), s=0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "plt.xlim((0, 1))\n", + "plt.ylim((0, 1))\n", + "plt.gca().set_aspect(\"equal\")\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n", + "#plt.ylim((0, 1))\n", + "#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "#ax1.set_xlim((0, 1))\n", + "#ax1.set_aspect(\"equal\")\n", + "#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "#ax2.set_xlim((0, 1))\n", + "#ax2.set_aspect(\"equal\")\n", + "#plt.show()\n", + "\n", + "crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.75, algorithm=CRASH.Algorithm.CREAM)\n", + "crash.search()\n", + "(_, _, depth, threshold) = crash.get_best()[0]\n", + "\n", + "cream = Extractor.cream(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)\n", + "theory_from_cream = cream.extract(train)\n", + "print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))\n", + "#print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))\n", + "\n", + "plt.scatter(test.X, test.Y, c = cream.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n", + "plt.xlim((0, 1))\n", + "plt.ylim((0, 1))\n", + "plt.gca().set_aspect(\"equal\")\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "it = Extractor.iter(predictor, min_update=1.0 / 20, n_points=1, max_iterations=600,\n", + " min_examples=100, threshold=1.5)\n", + "theory_from_iter = it.extract(train)\n", + "print('ITER performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(it.n_rules, it.mae(test), it.mae(test, predictor)))\n", + "print('ITER extracted rules:\\n\\n' + pretty_theory(theory_from_iter))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n", + " max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDEX, objective=Objective.MODEL)\n", + "pedro.search()\n", + "(_, _, threshold, grid) = pedro.get_best()[0]\n", + "\n", + "gridEx = Extractor.gridex(predictor, grid, threshold=threshold)\n", + "theory_from_gridEx = gridEx.extract(train)\n", + "print('GridEx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(gridEx.n_rules, gridEx.mae(test), gridEx.mae(test, predictor)))\n", + "print('GridEx extracted rules:\\n\\n' + pretty_theory(theory_from_gridEx))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n", + " max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n", + "pedro.search()\n", + "(_, _, threshold, grid) = pedro.get_best()[0]\n", + "\n", + "gridREx = Extractor.gridrex(predictor, grid, threshold=threshold)\n", + "theory_from_gridREx = gridREx.extract(train)\n", + "print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, predictor)))\n", + "print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/demo/StairwAIdemo.ipynb b/demo/StairwAIdemo.ipynb new file mode 100644 index 00000000..881a8fb1 --- /dev/null +++ b/demo/StairwAIdemo.ipynb @@ -0,0 +1,2080 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f52126f3", + "metadata": {}, + "source": [ + "# PSyKE's demo for regression tasks\n", + "\n", + "Some imports." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6b710e7c", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from os.path import exists\n", + "from psyke import Extractor\n", + "from psyke.utils.logic import pretty_theory\n", + "from psyke.regression import FeatureRanker\n", + "from psyke.optimizer.pedro import PEDRO\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from psyke.optimizer import Objective\n", + "from psyke.optimizer.crash import CRASH\n", + "\n", + "import warnings\n", + "warnings.simplefilter(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "algorithms = [\"contingency\", \"contingency\", \"anticipate\", \"anticipate\", \"contingency\", \"anticipate\"]\n", + "models = [\n", + " \"CONTINGENCY_no_input-memory_DecisionTree_MaxDepth10\",\n", + " \"CONTINGENCY_no_input-time_DecisionTree_MaxDepth10\",\n", + " \"ANTICIPATE_no_input-memory_DecisionTree_MaxDepth10\",\n", + " \"ANTICIPATE_no_input-time_DecisionTree_MaxDepth10\",\n", + " \"CONTINGENCY_input-cost_DecisionTree_MaxDepth15\",\n", + " \"ANTICIPATE_input-cost_DecisionTree_MaxDepth15\"\n", + "]\n", + "models = [\n", + " pickle.load(open(\"test/resources/datasets/models/\" + algorithm + \"/\" + path, 'rb'))\n", + " for path, algorithm in zip(models, algorithms)\n", + "]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "def process(algorithm):\n", + " df = pd.read_csv(\"test/resources/datasets/datasets/\" + algorithm + \"_trainDataset.csv\")\n", + "\n", + " # Removes header entries\n", + " df = df[df['sol(keuro)'] != 'sol(keuro)']\n", + "\n", + " # Fixed stuff which is always there\n", + " df['PV(kW)'] = df['PV(kW)'].map(lambda entry: entry[1:-1].split())\n", + " df['PV(kW)'] = df['PV(kW)'].map(lambda entry: list(np.float_(entry)))\n", + " df['Load(kW)'] = df['Load(kW)'].map(lambda entry: entry[1:-1].split())\n", + " df['Load(kW)'] = df['Load(kW)'].map(lambda entry: list(np.float_(entry)))\n", + "\n", + " X = pd.DataFrame()\n", + "\n", + " X['PV_mean'] = df['PV(kW)'].map(lambda entry: np.array(entry).mean())\n", + " X['PV_std'] = df['PV(kW)'].map(lambda entry: np.array(entry).std())\n", + " X['Load_mean'] = df['Load(kW)'].map(lambda entry: np.array(entry).mean())\n", + " X['Load_std'] = df['Load(kW)'].map(lambda entry: np.array(entry).std())\n", + " X['nScenarios'] = df['nScenarios']\n", + " X['cost'] = df['sol(keuro)']\n", + " X['time'] = df['time(sec)']\n", + " X['memo'] = df['memAvg(MB)']\n", + "\n", + " X.to_csv(\"test/resources/datasets/datasets/\" + algorithm + \".csv\", index = False)\n", + "\n", + " return X" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "toRemove = [\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'time', 'cost'],\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'memo', 'cost'],\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'time', 'cost'],\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'memo', 'cost'],\n", + " [\"time\", \"memo\"],\n", + " [\"time\", \"memo\"]\n", + "]\n", + "\n", + "features = [\n", + " [\"nTraces\"],\n", + " [\"nTraces\"],\n", + " [\"nScenarios\"],\n", + " [\"nScenarios\"],\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'nTraces'],\n", + " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'nScenarios']\n", + "]\n", + "\n", + "targets = [\"memo\", \"time\", \"memo\", \"time\", \"cost\", \"cost\"]\n", + "\n", + "output = []" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1\n", + "MAE = 3.64\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 3.43. MAE = 2.29, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 5.34. MAE = 2.25, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 7.25. MAE = 2.25, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 3.43. MAE = 2.29, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 5.34. MAE = 2.32, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 3.43. MAE = 2.25, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 5.34. MAE = 2.30, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 3.43. MAE = 2.17, 3 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 5.34. MAE = 2.30, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 7.25. MAE = 2.25, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 9.16. MAE = 2.25, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 2.17, 3 rules\n", + "Threshold = 3.43\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 2.17, 3 rules\n", + "Threshold = 3.43\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.25, 2 rules\n", + "Threshold = 7.25\n", + "Depth = 4\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 4.65\n", + "MAE fidelity = 2.28\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 3.43. MAE = 2.14, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 5.34. MAE = 2.18, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 3.43. MAE = 2.14, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 5.34. MAE = 2.18, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 2.14, 2 rules\n", + "Threshold = 3.43\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 2.14, 2 rules\n", + "Threshold = 3.43\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.18, 2 rules\n", + "Threshold = 5.34\n", + "Depth = 2\n", + "\n", + "CREAM performance (2 rules):\n", + "MAE = 4.63\n", + "MAE fidelity = 2.31\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "contingency time 1\n", + "MAE = 0.84\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.74. MAE = 0.76, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 1.15. MAE = 0.80, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.74. MAE = 0.74, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 1.15. MAE = 0.82, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.74. MAE = 0.77, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 1.15. MAE = 0.82, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 1.56. MAE = 0.82, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.74. MAE = 0.74, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 1.15. MAE = 0.74, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 0.74. MAE = 0.74, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 1.15. MAE = 0.82, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 0.74, 2 rules\n", + "Threshold = 0.74\n", + "Depth = 5\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 0.74, 2 rules\n", + "Threshold = 0.74\n", + "Depth = 5\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 0.82, 2 rules\n", + "Threshold = 1.15\n", + "Depth = 5\n", + "\n", + "CReEPy performance (6 rules):\n", + "MAE = 0.97\n", + "MAE fidelity = 0.65\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.74. MAE = 0.79, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 1.15. MAE = 0.81, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.74. MAE = 0.78, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 1.15. MAE = 0.81, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 1.56. MAE = 0.81, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 1.98. MAE = 0.81, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 2.39. MAE = 0.81, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 2.80. MAE = 0.81, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 0.78, 3 rules\n", + "Threshold = 0.74\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 0.78, 3 rules\n", + "Threshold = 0.74\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 0.81, 2 rules\n", + "Threshold = 2.80\n", + "Depth = 2\n", + "\n", + "CREAM performance (3 rules):\n", + "MAE = 1.03\n", + "MAE fidelity = 0.77\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate memo 1\n", + "MAE = 4.10\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 3.75. MAE = 3.37, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 5.83. MAE = 3.37, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 3.75. MAE = 3.37, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 5.83. MAE = 4.01, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 3.37, 2 rules\n", + "Threshold = 3.75\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 3.37, 2 rules\n", + "Threshold = 3.75\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 4.01, 2 rules\n", + "Threshold = 5.83\n", + "Depth = 2\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 4.96\n", + "MAE fidelity = 3.40\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 3.75. MAE = 4.07, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 5.83. MAE = 4.07, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 3.75. MAE = 3.84, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 5.83. MAE = 4.07, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 7.91. MAE = 4.07, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 3.84, 3 rules\n", + "Threshold = 3.75\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 3.84, 3 rules\n", + "Threshold = 3.75\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 4.07, 2 rules\n", + "Threshold = 5.83\n", + "Depth = 2\n", + "\n", + "CREAM performance (2 rules):\n", + "MAE = 4.96\n", + "MAE fidelity = 3.40\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate time 1\n", + "MAE = 8.30\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 7.23. MAE = 8.19, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 11.25. MAE = 9.41, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 7.23. MAE = 9.46, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 11.25. MAE = 9.41, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 15.27. MAE = 7.03, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 19.29. MAE = 11.34, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 7.23. MAE = 11.65, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 11.25. MAE = 7.17, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 15.27. MAE = 10.95, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 7.23. MAE = 11.65, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 11.25. MAE = 9.41, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 15.27. MAE = 7.97, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 19.29. MAE = 11.65, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 7.23. MAE = 9.16, 5 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 11.25. MAE = 11.65, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 7.03, 2 rules\n", + "Threshold = 15.27\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 7.03, 2 rules\n", + "Threshold = 15.27\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 11.65, 2 rules\n", + "Threshold = 19.29\n", + "Depth = 4\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 13.76\n", + "MAE fidelity = 10.72\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 7.23. MAE = 12.60, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 11.25. MAE = 12.66, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 7.23. MAE = 12.45, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 11.25. MAE = 11.95, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 15.27. MAE = 12.74, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 19.29. MAE = 12.66, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 23.30. MAE = 12.57, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 27.32. MAE = 12.57, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 11.95, 3 rules\n", + "Threshold = 11.25\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 11.95, 3 rules\n", + "Threshold = 11.25\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 12.57, 2 rules\n", + "Threshold = 23.30\n", + "Depth = 2\n", + "\n", + "CREAM performance (3 rules):\n", + "MAE = 10.92\n", + "MAE fidelity = 6.81\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "contingency cost 5\n", + "MAE = 1.50\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.50. MAE = 25.88, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.78. MAE = 27.67, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.50. MAE = 27.32, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.78. MAE = 25.65, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 1.06. MAE = 26.93, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 25.65, 3 rules\n", + "Threshold = 0.78\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 25.65, 3 rules\n", + "Threshold = 0.78\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 27.67, 2 rules\n", + "Threshold = 0.78\n", + "Depth = 1\n", + "\n", + "CReEPy performance (3 rules):\n", + "MAE = 26.68\n", + "MAE fidelity = 26.88\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.50. MAE = 29.46, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.78. MAE = 29.46, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.50. MAE = 29.16, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.78. MAE = 29.19, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 29.16, 3 rules\n", + "Threshold = 0.50\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 29.16, 3 rules\n", + "Threshold = 0.50\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 29.46, 2 rules\n", + "Threshold = 0.50\n", + "Depth = 1\n", + "\n", + "CREAM performance (4 rules):\n", + "MAE = 26.66\n", + "MAE fidelity = 26.85\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate cost 5\n", + "MAE = 0.41\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.03. MAE = 1.77, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.05. MAE = 1.81, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.03. MAE = 2.24, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.05. MAE = 2.40, 3 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.03. MAE = 2.36, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.05. MAE = 2.39, 4 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.03. MAE = 2.30, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.05. MAE = 1.70, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.07. MAE = 1.68, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.09. MAE = 1.65, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.11. MAE = 1.70, 5 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 1.65, 5 rules\n", + "Threshold = 0.09\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.65, 5 rules\n", + "Threshold = 0.09\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 1.81, 2 rules\n", + "Threshold = 0.05\n", + "Depth = 1\n", + "\n", + "CReEPy performance (5 rules):\n", + "MAE = 1.43\n", + "MAE fidelity = 1.66\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.03. MAE = 2.87, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.05. MAE = 2.90, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.03. MAE = 2.65, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.05. MAE = 2.88, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.07. MAE = 2.82, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.09. MAE = 2.81, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.11. MAE = 2.82, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 2.65, 4 rules\n", + "Threshold = 0.03\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 2.65, 4 rules\n", + "Threshold = 0.03\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.90, 2 rules\n", + "Threshold = 0.05\n", + "Depth = 1\n", + "\n", + "CREAM performance (4 rules):\n", + "MAE = 1.36\n", + "MAE fidelity = 1.46\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for rem, feat, target, algorithm, model in zip(toRemove, features, targets, algorithms, models):\n", + "\n", + " print(algorithm, target, len(feat))\n", + " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + " if not exists(name):\n", + " process(algorithm)\n", + "\n", + " dataset = pd.read_csv(name)\n", + "\n", + " dataset = dataset.drop(rem, axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n", + " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n", + " print(\"MAE = {:.2f}\".format(E.mean()))\n", + " output.append((\"model\", algorithm, target, E.mean()))\n", + " #plt.plot(E, \".\")\n", + " #plt.show()\n", + "\n", + " #pedro = PEDRO(model, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n", + " # max_depth=5, patience=1, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n", + " #pedro.search()\n", + " #best = pedro.get_best()[0]\n", + "\n", + " #ranked = FeatureRanker(dataset.columns[:-1]).fit(model, dataset.iloc[:, :-1]).rankings()\n", + " #gridREx = Extractor.gridrex(model, best[3], threshold=best[2])\n", + " #theory_from_gridREx = gridREx.extract(train)\n", + " #print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " # .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, model)))\n", + " #print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n", + " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.MODEL)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_creepy = creepy.extract(train)\n", + " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n", + " algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_cream = cream.extract(train)\n", + " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n", + "\n", + " print()\n", + " print()\n", + " print()\n", + " print()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1\n", + "MAE = 2.82\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 2.69. MAE = 4.75, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 4.18. MAE = 4.83, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 2.69. MAE = 4.83, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 4.18. MAE = 4.79, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 5.68. MAE = 4.75, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 7.17. MAE = 4.75, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 2.69. MAE = 4.81, 3 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 4.18. MAE = 4.79, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 5.68. MAE = 4.81, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 2.69. MAE = 4.76, 3 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 4.18. MAE = 4.81, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 5.68. MAE = 4.79, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 4.75, 2 rules\n", + "Threshold = 2.69\n", + "Depth = 1\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 4.75, 2 rules\n", + "Threshold = 2.69\n", + "Depth = 1\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 4.81, 2 rules\n", + "Threshold = 4.18\n", + "Depth = 4\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 4.61\n", + "MAE fidelity = 4.37\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 2.69. MAE = 4.59, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 4.18. MAE = 4.59, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 2.69. MAE = 4.40, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 4.18. MAE = 4.59, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 5.68. MAE = 4.59, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 4.40, 3 rules\n", + "Threshold = 2.69\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 4.40, 3 rules\n", + "Threshold = 2.69\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 4.59, 2 rules\n", + "Threshold = 5.68\n", + "Depth = 2\n", + "\n", + "CREAM performance (4 rules):\n", + "MAE = 4.55\n", + "MAE fidelity = 3.93\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "contingency time 1\n", + "MAE = 1.02\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.90. MAE = 1.16, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 1.39. MAE = 1.19, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.90. MAE = 1.19, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 1.39. MAE = 1.16, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 1.89. MAE = 1.16, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 1.16, 2 rules\n", + "Threshold = 1.39\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.16, 2 rules\n", + "Threshold = 1.39\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 1.16, 2 rules\n", + "Threshold = 1.39\n", + "Depth = 2\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 1.10\n", + "MAE fidelity = 1.02\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.90. MAE = 1.10, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 1.39. MAE = 1.07, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 1.89. MAE = 1.07, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.90. MAE = 1.13, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 1.39. MAE = 1.07, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 1.89. MAE = 1.07, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 2.39. MAE = 1.10, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 1.07, 2 rules\n", + "Threshold = 1.89\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.07, 2 rules\n", + "Threshold = 1.89\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 1.10, 2 rules\n", + "Threshold = 2.39\n", + "Depth = 2\n", + "\n", + "CREAM performance (2 rules):\n", + "MAE = 1.10\n", + "MAE fidelity = 1.02\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate memo 1\n", + "MAE = 4.91\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 4.46. MAE = 5.14, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 6.94. MAE = 5.14, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 4.46. MAE = 5.81, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 6.94. MAE = 5.13, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 9.42. MAE = 5.13, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 4.46. MAE = 5.15, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 6.94. MAE = 5.93, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 4.46. MAE = 5.13, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 6.94. MAE = 5.13, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 4.46. MAE = 5.14, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 6.94. MAE = 5.90, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 5.13, 2 rules\n", + "Threshold = 4.46\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 5.13, 2 rules\n", + "Threshold = 4.46\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 5.90, 2 rules\n", + "Threshold = 6.94\n", + "Depth = 5\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 4.96\n", + "MAE fidelity = 4.57\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 4.46. MAE = 5.67, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 6.94. MAE = 5.67, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 4.46. MAE = 5.47, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 6.94. MAE = 5.67, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 9.42. MAE = 5.91, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 5.47, 3 rules\n", + "Threshold = 4.46\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 5.47, 3 rules\n", + "Threshold = 4.46\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 5.91, 2 rules\n", + "Threshold = 9.42\n", + "Depth = 2\n", + "\n", + "CREAM performance (3 rules):\n", + "MAE = 5.03\n", + "MAE fidelity = 4.30\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate time 1\n", + "MAE = 9.34\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 8.25. MAE = 12.46, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 12.84. MAE = 14.29, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 8.25. MAE = 14.30, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 12.84. MAE = 12.33, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 17.42. MAE = 13.83, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 22.01. MAE = 11.68, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 26.59. MAE = 14.29, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 8.25. MAE = 12.38, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 12.84. MAE = 14.29, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 17.42. MAE = 12.46, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 22.01. MAE = 12.46, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 8.25. MAE = 13.45, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 12.84. MAE = 13.14, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 17.42. MAE = 13.39, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 8.25. MAE = 12.36, 4 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 12.84. MAE = 14.29, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 17.42. MAE = 14.29, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 11.68, 2 rules\n", + "Threshold = 22.01\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 11.68, 2 rules\n", + "Threshold = 22.01\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 14.29, 2 rules\n", + "Threshold = 12.84\n", + "Depth = 5\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 13.66\n", + "MAE fidelity = 11.29\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 8.25. MAE = 14.62, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 12.84. MAE = 14.62, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 8.25. MAE = 14.38, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 12.84. MAE = 14.67, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 17.42. MAE = 14.76, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 14.38, 3 rules\n", + "Threshold = 8.25\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 14.38, 3 rules\n", + "Threshold = 8.25\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 14.76, 2 rules\n", + "Threshold = 17.42\n", + "Depth = 2\n", + "\n", + "CREAM performance (3 rules):\n", + "MAE = 11.01\n", + "MAE fidelity = 8.26\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "contingency cost 5\n", + "MAE = 2.07\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 26.89, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 27.65, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.46, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.46, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 26.46, 3 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 26.46, 3 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 27.65, 2 rules\n", + "Threshold = 0.00\n", + "Depth = 1\n", + "\n", + "CReEPy performance (3 rules):\n", + "MAE = 26.68\n", + "MAE fidelity = 27.05\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.31, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.31, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 28.66, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.18, 4 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 28.66, 4 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 28.66, 4 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 29.31, 2 rules\n", + "Threshold = 0.00\n", + "Depth = 1\n", + "\n", + "CREAM performance (4 rules):\n", + "MAE = 26.71\n", + "MAE fidelity = 27.07\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "anticipate cost 5\n", + "MAE = 0.50\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.44, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.39, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 1.79, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.41, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 1.70, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.35, 3 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 1.70, 3 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.70, 3 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.41, 2 rules\n", + "Threshold = 0.00\n", + "Depth = 1\n", + "\n", + "CReEPy performance (3 rules):\n", + "MAE = 1.99\n", + "MAE fidelity = 2.12\n", + "\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.86, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.83, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.86, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.89, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.80, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.65, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.81, 4 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 2.65, 4 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 2.65, 4 rules\n", + "Threshold = 0.00\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.86, 2 rules\n", + "Threshold = 0.00\n", + "Depth = 1\n", + "\n", + "CREAM performance (4 rules):\n", + "MAE = 1.36\n", + "MAE fidelity = 1.53\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for rem, feat, target, algorithm in zip(toRemove, features, targets, algorithms):\n", + "\n", + " print(algorithm, target, len(feat))\n", + " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + " if not exists(name):\n", + " process(algorithm)\n", + "\n", + " dataset = pd.read_csv(name)\n", + "\n", + " dataset = dataset.drop(rem, axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n", + " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n", + " print(\"MAE = {:.2f}\".format(E.mean()))\n", + " output.append((\"model\", algorithm, target, E.mean()))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n", + " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.DATA)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_creepy = creepy.extract(train)\n", + " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n", + " algorithm=CRASH.Algorithm.CREAM, objective=Objective.DATA)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_cream = cream.extract(train)\n", + " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n", + "\n", + " print()\n", + " print()\n", + " print()\n", + " print()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1\n", + "skip\n", + "contingency time 1\n", + "skip\n", + "anticipate memo 1\n", + "skip\n", + "anticipate time 1\n", + "skip\n", + "contingency cost 5\n", + "MAE = 1.50\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.50. MAE = 25.63, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.78. MAE = 27.54, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 27.54. MAE = 27.67, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.50. MAE = 26.13, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.78. MAE = 26.24, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 26.24. MAE = 23.74, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 51.69. MAE = 27.39, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 77.14. MAE = 28.03, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.50. MAE = 24.22, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.78. MAE = 23.26, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 1.06. MAE = 23.67, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 23.67. MAE = 25.08, 4 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.50. MAE = 23.93, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.78. MAE = 23.02, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 1.06. MAE = 23.50, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 23.50. MAE = 25.68, 3 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 45.93. MAE = 26.90, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 68.36. MAE = 27.50, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 0.50. MAE = 23.02, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 0.78. MAE = 22.76, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 1.06. MAE = 23.45, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 23.45. MAE = 23.00, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 45.84. MAE = 27.50, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 68.23. MAE = 28.12, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 23.02, 5 rules\n", + "Threshold = 0.78\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 22.76, 6 rules\n", + "Threshold = 0.78\n", + "Depth = 5\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 27.50, 2 rules\n", + "Threshold = 45.84\n", + "Depth = 5\n", + "\n", + "CReEPy performance (5 rules):\n", + "MAE = 21.28\n", + "MAE fidelity = 21.36\n", + "\n", + "\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 25.62, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 26.89, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 26.89. MAE = 27.48, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.56, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 25.63, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 24.52, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.68, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 26.68. MAE = 26.14, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 53.37. MAE = 27.42, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 80.05. MAE = 27.54, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 24.15, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 24.65, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 24.65. MAE = 26.10, 4 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.00. MAE = 23.24, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 0.00. MAE = 24.15, 5 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 24.15. MAE = 25.62, 2 rules\n", + "Algorithm.CReEPy. Depth: 4. Threshold = 48.31. MAE = 26.52, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 0.00. MAE = 23.50, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 0.00. MAE = 24.71, 6 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 24.71. MAE = 24.40, 3 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 49.41. MAE = 27.48, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 74.12. MAE = 26.89, 2 rules\n", + "Algorithm.CReEPy. Depth: 5. Threshold = 98.83. MAE = 27.42, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 23.24, 5 rules\n", + "Threshold = 0.00\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 23.24, 5 rules\n", + "Threshold = 0.00\n", + "Depth = 4\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 27.42, 2 rules\n", + "Threshold = 98.83\n", + "Depth = 5\n", + "\n", + "CReEPy performance (5 rules):\n", + "MAE = 25.50\n", + "MAE fidelity = 25.84\n", + "\n", + "\n", + "anticipate cost 5\n", + "MAE = 0.41\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.03. MAE = 2.70, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.05. MAE = 2.37, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.07. MAE = 1.96, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.09. MAE = 2.43, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 2.43. MAE = 1.77, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 4.77. MAE = 2.39, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.03. MAE = 2.42, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.05. MAE = 2.19, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.07. MAE = 2.25, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 2.25. MAE = 2.40, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 4.42. MAE = 1.77, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 6.59. MAE = 2.36, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 1.77, 2 rules\n", + "Threshold = 4.42\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.77, 2 rules\n", + "Threshold = 4.42\n", + "Depth = 2\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 2.36, 2 rules\n", + "Threshold = 6.59\n", + "Depth = 2\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 2.08\n", + "MAE fidelity = 2.16\n", + "\n", + "\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.36, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.36, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 2.36. MAE = 1.79, 2 rules\n", + "Algorithm.CReEPy. Depth: 1. Threshold = 4.73. MAE = 2.69, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.35, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.62, 3 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 2.62. MAE = 2.36, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 5.23. MAE = 1.77, 2 rules\n", + "Algorithm.CReEPy. Depth: 2. Threshold = 7.85. MAE = 2.38, 2 rules\n", + "\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 2.35, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 2.38, 4 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 2.38. MAE = 1.77, 2 rules\n", + "Algorithm.CReEPy. Depth: 3. Threshold = 4.77. MAE = 1.77, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CReEPy*\n", + "**********************\n", + "MAE = 1.77, 2 rules\n", + "Threshold = 2.38\n", + "Depth = 3\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 1.77, 2 rules\n", + "Threshold = 2.38\n", + "Depth = 3\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 1.77, 2 rules\n", + "Threshold = 2.38\n", + "Depth = 3\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 2.08\n", + "MAE fidelity = 2.19\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i, (rem, feat, target, algorithm, model) in enumerate(zip(toRemove, features, targets, algorithms, models)):\n", + "\n", + " print(algorithm, target, len(feat))\n", + "\n", + " if i < 4:\n", + " print(\"skip\")\n", + " continue\n", + "\n", + " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + " if not exists(name):\n", + " process(algorithm)\n", + "\n", + " dataset = pd.read_csv(name)\n", + "\n", + " dataset = dataset.drop(rem, axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n", + " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n", + " print(\"MAE = {:.2f}\".format(E.mean()))\n", + " output.append((\"model\", algorithm, target, E.mean()))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n", + " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.MODEL)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_creepy = creepy.extract(train)\n", + " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n", + "\n", + " print()\n", + "\n", + " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n", + " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.DATA)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_creepy = creepy.extract(train)\n", + " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n", + "\n", + " print()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1\n", + "skip\n", + "contingency time 1\n", + "skip\n", + "anticipate memo 1\n", + "skip\n", + "anticipate time 1\n", + "skip\n", + "contingency cost 5\n", + "MAE = 1.50\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.47, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.37, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.40, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 29.40. MAE = 29.27, 2 rules\n", + "Algorithm.CREAM. Depth: 1. Threshold = 58.80. MAE = 29.31, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.21, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.14, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.21, 4 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 29.21. MAE = 28.99, 3 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 58.41. MAE = 29.37, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 87.62. MAE = 29.37, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 116.83. MAE = 29.31, 2 rules\n", + "Algorithm.CREAM. Depth: 2. Threshold = 146.04. MAE = 29.47, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 28.22, 8 rules\n", + "Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 29.21, 8 rules\n", + "Algorithm.CREAM. Depth: 3. Threshold = 29.21. MAE = 29.01, 4 rules\n", + "Algorithm.CREAM. Depth: 3. Threshold = 58.43. MAE = 29.47, 2 rules\n", + "Algorithm.CREAM. Depth: 3. Threshold = 87.64. MAE = 29.47, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 4. Threshold = 0.00. MAE = 28.36, 16 rules\n", + "Algorithm.CREAM. Depth: 4. Threshold = 0.00. MAE = 27.78, 16 rules\n", + "Algorithm.CREAM. Depth: 4. Threshold = 27.78. MAE = 28.92, 5 rules\n", + "Algorithm.CREAM. Depth: 4. Threshold = 55.55. MAE = 29.31, 2 rules\n", + "Algorithm.CREAM. Depth: 4. Threshold = 83.33. MAE = 29.39, 2 rules\n", + "\n", + "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 28.63, 26 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 28.82, 22 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 27.51, 21 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 27.51. MAE = 28.28, 6 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 55.03. MAE = 29.48, 2 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 82.54. MAE = 29.37, 2 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 110.06. MAE = 28.26, 2 rules\n", + "Algorithm.CREAM. Depth: 5. Threshold = 137.57. MAE = 29.24, 2 rules\n", + "\n", + "**********************\n", + "*Best Algorithm.CREAM*\n", + "**********************\n", + "MAE = 28.26, 2 rules\n", + "Threshold = 110.06\n", + "Depth = 5\n", + "\n", + "**********************\n", + "*Best MAE *\n", + "**********************\n", + "MAE = 27.51, 21 rules\n", + "Threshold = 0.00\n", + "Depth = 5\n", + "\n", + "**********************\n", + "*Best N rules*\n", + "**********************\n", + "MAE = 29.24, 2 rules\n", + "Threshold = 137.57\n", + "Depth = 5\n", + "\n", + "CReEPy performance (2 rules):\n", + "MAE = 27.93\n", + "MAE fidelity = 28.32\n", + "\n", + "\n", + "anticipate cost 5\n", + "skip\n" + ] + } + ], + "source": [ + "for i, (rem, feat, target, algorithm, model) in enumerate(zip(toRemove, features, targets, algorithms, models)):\n", + "\n", + " print(algorithm, target, len(feat))\n", + "\n", + " if i != 4:\n", + " print(\"skip\")\n", + " continue\n", + "\n", + " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + " if not exists(name):\n", + " process(algorithm)\n", + "\n", + " dataset = pd.read_csv(name)\n", + "\n", + " dataset = dataset.drop(rem, axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n", + " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n", + " print(\"MAE = {:.2f}\".format(E.mean()))\n", + " output.append((\"model\", algorithm, target, E.mean()))\n", + "\n", + " #crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n", + " # algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n", + " #crash.search()\n", + " #best = crash.get_best()[0]\n", + "\n", + " #cream = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " #theory_from_cream = cream.extract(train)\n", + " #print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " # .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n", + "\n", + " #print()\n", + "\n", + " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n", + " algorithm=CRASH.Algorithm.CREAM, objective=Objective.DATA)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " cream = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_cream = cream.extract(train)\n", + " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n", + "\n", + " print()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The slowest run took 113.80 times longer than the fastest. This could mean that an intermediate result is being cached.\n", + "50 µs ± 137 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%timeit -r 100 -n 1 print(5, end=\"\\r\")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 103, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1 ['PV_mean', 'PV_std', 'Load_mean', 'Load_std'] ['time', 'cost']\n", + "\n", + "5 variables\n", + "\n", + "100 instances\n", + "0.03 +- 0.01\n", + "\n", + "500 instances\n", + "0.04 +- 0.01\n", + "\n", + "1000 instances\n", + "0.08 +- 0.02\n", + "\n", + "2000 instances\n", + "0.24 +- 0.04\n", + "\n", + "4000 instances\n", + "0.40 +- 0.07\n", + "\n", + "7000 instances\n", + "1.49 +- 0.16\n", + "\n", + "10000 instances\n", + "3.24 +- 0.48\n", + "\n", + "\n", + "4 variables\n", + "\n", + "100 instances\n", + "0.03 +- 0.01\n", + "\n", + "500 instances\n", + "0.04 +- 0.01\n", + "\n", + "1000 instances\n", + "0.08 +- 0.01\n", + "\n", + "2000 instances\n", + "0.19 +- 0.02\n", + "\n", + "4000 instances\n", + "0.44 +- 0.10\n", + "\n", + "7000 instances\n", + "1.18 +- 0.06\n", + "\n", + "10000 instances\n", + "3.27 +- 0.34\n", + "\n", + "\n", + "3 variables\n", + "\n", + "100 instances\n", + "0.03 +- 0.01\n", + "\n", + "500 instances\n", + "0.05 +- 0.01\n", + "\n", + "1000 instances\n", + "0.06 +- 0.01\n", + "\n", + "2000 instances\n", + "0.15 +- 0.01\n", + "\n", + "4000 instances\n", + "0.35 +- 0.03\n", + "\n", + "7000 instances\n", + "0.96 +- 0.05\n", + "\n", + "10000 instances\n", + "2.01 +- 0.18\n", + "\n", + "\n", + "2 variables\n", + "\n", + "100 instances\n", + "0.03 +- 0.00\n", + "\n", + "500 instances\n", + "0.04 +- 0.01\n", + "\n", + "1000 instances\n", + "0.07 +- 0.01\n", + "\n", + "2000 instances\n", + "0.12 +- 0.02\n", + "\n", + "4000 instances\n", + "0.32 +- 0.07\n", + "\n", + "7000 instances\n", + "0.66 +- 0.04\n", + "\n", + "10000 instances\n", + "2.09 +- 0.24\n", + "\n", + "\n", + "1 variables\n", + "\n", + "100 instances\n", + "0.05 +- 0.01\n", + "\n", + "500 instances\n", + "0.06 +- 0.01\n", + "\n", + "1000 instances\n", + "0.38 +- 0.18\n", + "\n", + "2000 instances\n", + "0.81 +- 0.11\n", + "\n", + "4000 instances\n", + "1.17 +- 0.08\n", + "\n", + "7000 instances\n", + "7.75 +- 0.50\n", + "\n", + "10000 instances\n", + "4.50 +- 0.29\n", + "\n", + "\n", + "{5: [0.03287119388580322, 0.041033308506011966, 0.08207057476043701, 0.24315330743789673, 0.40391945362091064, 1.488190951347351, 3.2419833016395567], 4: [0.02859572172164917, 0.042196600437164306, 0.07671485900878906, 0.18936739206314088, 0.4387374997138977, 1.181975963115692, 3.269099836349487], 3: [0.027770025730133055, 0.0483594012260437, 0.06041494607925415, 0.15415239572525025, 0.3471947169303894, 0.9557713294029235, 2.00965603351593], 2: [0.03156923770904541, 0.043448188304901124, 0.06604753017425537, 0.1234019160270691, 0.3216898012161255, 0.6594726300239563, 2.088407206535339], 1: [0.054462478160858155, 0.06058593988418579, 0.37771976232528687, 0.8130515933036804, 1.1683255887031556, 7.751835277080536, 4.501545391082764]}\n", + "{5: [0.008006568310036771, 0.010832056515099364, 0.017278037467434816, 0.04436329902541149, 0.07084922210433292, 0.16235953798011757, 0.47879549725950094], 4: [0.006010070122448614, 0.007395213923417343, 0.012944601559750614, 0.018297218049809154, 0.09872410923183107, 0.0566825482202447, 0.3407695658478622], 3: [0.006643890546785536, 0.010349164269489706, 0.00623181652795555, 0.007351175705842842, 0.0283532494275511, 0.04679988236936149, 0.17987806095635428], 2: [0.003937103959197547, 0.01014291744697181, 0.00619447504987264, 0.019691113374270007, 0.07097996098355641, 0.040935159311804685, 0.24153349831790663], 1: [0.00812968064374636, 0.006694817921617686, 0.17816417920507002, 0.1053105435784018, 0.07591906174957001, 0.5019025282675496, 0.2873544931115201]}\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "i = 0\n", + "\n", + "rem, feat, target, algorithm, model = toRemove[i], features[i], targets[i], algorithms[i], models[i]\n", + "\n", + "print(algorithm, target, len(feat), rem[:-2], rem[-2:])\n", + "print()\n", + "name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + "if not exists(name):\n", + " process(algorithm)\n", + "\n", + "dataset = pd.read_csv(name).drop(rem[-2 :], axis = 1)\n", + "\n", + "m = {}\n", + "e = {}\n", + "\n", + "for r in range(-1, len(rem) - 2):\n", + " if r >= 0:\n", + " dataset = dataset.drop([rem[r]], axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.1, random_state=10)\n", + " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + "\n", + " print(f\"{len(dataset.columns) - 1} variables\\n\")\n", + " dat = []\n", + " for j in [100, 500, 1000, 2000, 4000, 7000, 10000]:\n", + " print(j, \"instances\")\n", + " res = []\n", + " for i in range(100):\n", + " print(i, end=\"\\r\")\n", + " t0 = time.time()\n", + " creepy = Extractor.creepy(model, depth=1, error_threshold=.8, constant=False)\n", + " theory_from_creepy = creepy.extract(train.iloc[:j, :])\n", + " t1 = time.time()\n", + " res.append(t1 - t0)\n", + " res = np.array(res)\n", + " print(f'{np.mean(res):.2f} +- {np.std(res):.2f}')\n", + " print()\n", + " dat.append((np.mean(res), np.std(res)))\n", + " m[len(dataset.columns) - 1] = [d[0] for d in dat]\n", + " e[len(dataset.columns) - 1] = [d[1] for d in dat]\n", + " print()\n", + "print(m)\n", + "print(e)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 104, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "contingency memo 1 ['PV_mean', 'PV_std', 'Load_mean', 'Load_std'] ['time', 'cost']\n", + "\n", + "5 variables\n", + "\n", + "100 instances\n", + "0.04 +- 0.01\n", + "\n", + "500 instances\n", + "0.05 +- 0.01\n", + "\n", + "1000 instances\n", + "0.11 +- 0.02\n", + "\n", + "2000 instances\n", + "0.25 +- 0.04\n", + "\n", + "4000 instances\n", + "0.43 +- 0.09\n", + "\n", + "7000 instances\n", + "1.53 +- 0.22\n", + "\n", + "10000 instances\n", + "3.14 +- 0.44\n", + "\n", + "\n", + "4 variables\n", + "\n", + "100 instances\n", + "0.04 +- 0.01\n", + "\n", + "500 instances\n", + "0.05 +- 0.01\n", + "\n", + "1000 instances\n", + "0.08 +- 0.00\n", + "\n", + "2000 instances\n", + "0.19 +- 0.02\n", + "\n", + "4000 instances\n", + "0.40 +- 0.01\n", + "\n", + "7000 instances\n", + "1.21 +- 0.10\n", + "\n", + "10000 instances\n", + "3.15 +- 0.21\n", + "\n", + "\n", + "3 variables\n", + "\n", + "100 instances\n", + "0.04 +- 0.01\n", + "\n", + "500 instances\n", + "0.05 +- 0.01\n", + "\n", + "1000 instances\n", + "0.07 +- 0.01\n", + "\n", + "2000 instances\n", + "0.16 +- 0.01\n", + "\n", + "4000 instances\n", + "0.35 +- 0.02\n", + "\n", + "7000 instances\n", + "0.97 +- 0.08\n", + "\n", + "10000 instances\n", + "1.98 +- 0.17\n", + "\n", + "\n", + "2 variables\n", + "\n", + "100 instances\n", + "0.04 +- 0.01\n", + "\n", + "500 instances\n", + "0.06 +- 0.01\n", + "\n", + "1000 instances\n", + "0.08 +- 0.01\n", + "\n", + "2000 instances\n", + "0.11 +- 0.01\n", + "\n", + "4000 instances\n", + "0.33 +- 0.07\n", + "\n", + "7000 instances\n", + "0.64 +- 0.03\n", + "\n", + "10000 instances\n", + "2.01 +- 0.12\n", + "\n", + "\n", + "1 variables\n", + "\n", + "100 instances\n", + "0.05 +- 0.01\n", + "\n", + "500 instances\n", + "0.07 +- 0.01\n", + "\n", + "1000 instances\n", + "0.38 +- 0.18\n", + "\n", + "2000 instances\n", + "0.78 +- 0.03\n", + "\n", + "4000 instances\n", + "1.22 +- 0.16\n", + "\n", + "7000 instances\n", + "7.58 +- 0.35\n", + "\n", + "10000 instances\n", + "4.45 +- 0.24\n", + "\n", + "\n", + "{5: [0.042794463634490965, 0.05281662464141846, 0.10578654050827026, 0.2454042077064514, 0.4272336745262146, 1.5308331942558289, 3.1398912501335143], 4: [0.037742633819580075, 0.05019772291183472, 0.07981077432632447, 0.19199324607849122, 0.4008229851722717, 1.207040240764618, 3.1493691873550413], 3: [0.03636547803878784, 0.054166588783264163, 0.0737765073776245, 0.16370140075683592, 0.35039893388748167, 0.9726590657234192, 1.9785983300209045], 2: [0.03985038995742798, 0.056003010272979735, 0.07539872884750366, 0.11321240425109863, 0.32898611307144165, 0.6414580917358399, 2.012137541770935], 1: [0.05446675777435303, 0.0706281042098999, 0.37666619300842286, 0.777180597782135, 1.2234806847572326, 7.578078618049622, 4.447733290195465]}\n", + "{5: [0.006960809869187141, 0.007676563695357571, 0.0217224658216331, 0.04145347891457142, 0.09026192145722545, 0.22107988902352302, 0.4350317521126616], 4: [0.007603805854676417, 0.006809748933163486, 0.004490720164146021, 0.017805997294028073, 0.009527117999078386, 0.10359012897907922, 0.21151193401795604], 3: [0.0074533542868429345, 0.007757262205622548, 0.011480113435289822, 0.007219566419900182, 0.020423586767389295, 0.08343571324580469, 0.17491113373962613], 2: [0.00786624021155995, 0.012563408894036322, 0.008872308138793765, 0.0061607795200352014, 0.07073490739624513, 0.028163153334897523, 0.12469774042959099], 1: [0.00790054982045218, 0.009733123608675343, 0.17951258136789902, 0.03204691544868753, 0.16452532889211433, 0.3474080984680701, 0.23790970889358112]}\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "i = 0\n", + "\n", + "rem, feat, target, algorithm, model = toRemove[i], features[i], targets[i], algorithms[i], models[i]\n", + "\n", + "print(algorithm, target, len(feat), rem[:-2], rem[-2:])\n", + "print()\n", + "name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n", + "\n", + "if not exists(name):\n", + " process(algorithm)\n", + "\n", + "dataset = pd.read_csv(name).drop(rem[-2 :], axis = 1)\n", + "\n", + "m = {}\n", + "e = {}\n", + "\n", + "for r in range(-1, len(rem) - 2):\n", + " if r >= 0:\n", + " dataset = dataset.drop([rem[r]], axis = 1)\n", + "\n", + " train, test = train_test_split(dataset, test_size=0.1, random_state=10)\n", + " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n", + "\n", + " print(f\"{len(dataset.columns) - 1} variables\\n\")\n", + " dat = []\n", + " for j in [100, 500, 1000, 2000, 4000, 7000, 10000]:\n", + " print(j, \"instances\")\n", + " res = []\n", + " for i in range(100):\n", + " print(i, end=\"\\r\")\n", + " t0 = time.time()\n", + " cream = Extractor.cream(model, depth=1, error_threshold=.8, constant=False)\n", + " theory_from_cream = cream.extract(train.iloc[:j, :])\n", + " t1 = time.time()\n", + " res.append(t1 - t0)\n", + " res = np.array(res)\n", + " print(f'{np.mean(res):.2f} +- {np.std(res):.2f}')\n", + " print()\n", + " dat.append((np.mean(res), np.std(res)))\n", + " m[len(dataset.columns) - 1] = [d[0] for d in dat]\n", + " e[len(dataset.columns) - 1] = [d[1] for d in dat]\n", + " print()\n", + "print(m)\n", + "print(e)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "while True:\n", + " #pedro = PEDRO(model, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n", + " # max_depth=5, patience=1, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n", + " #pedro.search()\n", + " #best = pedro.get_best()[0]\n", + "\n", + " #ranked = FeatureRanker(dataset.columns[:-1]).fit(model, dataset.iloc[:, :-1]).rankings()\n", + " #gridREx = Extractor.gridrex(model, best[3], threshold=best[2])\n", + " #theory_from_gridREx = gridREx.extract(train)\n", + " #print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " # .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, model)))\n", + " #print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))\n", + "\n", + " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n", + " algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n", + " crash.search()\n", + " best = crash.get_best()[0]\n", + "\n", + " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n", + " theory_from_cream = cream.extract(train)\n", + " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n", + " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n", + "\n", + " print()\n", + " print()\n", + " print()\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file