diff --git a/Dockerfile b/Dockerfile
index 010a5937..89b64af3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,11 +9,7 @@ ENV JUPYTER_CONF_FILE /root/.jupyter/jupyter_notebook_config.py
RUN echo "c.NotebookApp.allow_origin = '*'" > $JUPYTER_CONF_FILE
RUN echo "c.NotebookApp.ip = '0.0.0.0'" >> $JUPYTER_CONF_FILE
RUN mkdir -p /notebook
-<<<<<<< HEAD
COPY demo/*.ipynb /notebook/
COPY test/resources/datasets/*.csv /notebook/datasets/
-=======
-COPY demo/DemoClassificationDisc.ipynb /notebook
->>>>>>> chore: move demos into demo/ directory
WORKDIR /notebook
-CMD jupyter notebook --allow-root --no-browser
+CMD jupyter notebook --allow-root --no-browser
\ No newline at end of file
diff --git a/demo/DemoClassification.ipynb b/demo/DemoClassification.ipynb
new file mode 100644
index 00000000..393649c5
--- /dev/null
+++ b/demo/DemoClassification.ipynb
@@ -0,0 +1,401 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# PSyKE's demo\n",
+ "\n",
+ "Some imports."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6b710e7c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.datasets import load_iris\n",
+ "import pandas as pd\n",
+ "\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from psyke.cart.predictor import CartPredictor\n",
+ "\n",
+ "from psyke import Extractor\n",
+ "from psyke.regression.strategy import AdaptiveStrategy\n",
+ "from psyke.regression import Grid, FeatureRanker, HyperCubeExtractor\n",
+ "from psyke.utils.logic import pretty_theory"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Import iris dataset separating features and class."
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f8e46c49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = load_iris(return_X_y=True, as_frame=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Rename of the features."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "38d5afb0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Replace integer indices with the corresponding string class."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "4f807185",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " target\n0 setosa\n1 setosa\n2 setosa\n3 setosa\n4 setosa\n.. ...\n145 versicolor\n146 versicolor\n147 versicolor\n148 versicolor\n149 versicolor\n\n[150 rows x 1 columns]",
+ "text/html": "
\n\n
\n \n \n | \n target | \n
\n \n \n \n 0 | \n setosa | \n
\n \n 1 | \n setosa | \n
\n \n 2 | \n setosa | \n
\n \n 3 | \n setosa | \n
\n \n 4 | \n setosa | \n
\n \n ... | \n ... | \n
\n \n 145 | \n versicolor | \n
\n \n 146 | \n versicolor | \n
\n \n 147 | \n versicolor | \n
\n \n 148 | \n versicolor | \n
\n \n 149 | \n versicolor | \n
\n \n
\n
150 rows × 1 columns
\n
"
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y = pd.DataFrame(y).replace({\"target\": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})\n",
+ "y"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "The final dataset:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "7ac49b4e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " SepalLength SepalWidth PetalLength PetalWidth iris\n0 5.1 3.5 1.4 0.2 setosa\n1 4.9 3.0 1.4 0.2 setosa\n2 4.7 3.2 1.3 0.2 setosa\n3 4.6 3.1 1.5 0.2 setosa\n4 5.0 3.6 1.4 0.2 setosa\n.. ... ... ... ... ...\n145 6.7 3.0 5.2 2.3 versicolor\n146 6.3 2.5 5.0 1.9 versicolor\n147 6.5 3.0 5.2 2.0 versicolor\n148 6.2 3.4 5.4 2.3 versicolor\n149 5.9 3.0 5.1 1.8 versicolor\n\n[150 rows x 5 columns]",
+ "text/html": "\n\n
\n \n \n | \n SepalLength | \n SepalWidth | \n PetalLength | \n PetalWidth | \n iris | \n
\n \n \n \n 0 | \n 5.1 | \n 3.5 | \n 1.4 | \n 0.2 | \n setosa | \n
\n \n 1 | \n 4.9 | \n 3.0 | \n 1.4 | \n 0.2 | \n setosa | \n
\n \n 2 | \n 4.7 | \n 3.2 | \n 1.3 | \n 0.2 | \n setosa | \n
\n \n 3 | \n 4.6 | \n 3.1 | \n 1.5 | \n 0.2 | \n setosa | \n
\n \n 4 | \n 5.0 | \n 3.6 | \n 1.4 | \n 0.2 | \n setosa | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 145 | \n 6.7 | \n 3.0 | \n 5.2 | \n 2.3 | \n versicolor | \n
\n \n 146 | \n 6.3 | \n 2.5 | \n 5.0 | \n 1.9 | \n versicolor | \n
\n \n 147 | \n 6.5 | \n 3.0 | \n 5.2 | \n 2.0 | \n versicolor | \n
\n \n 148 | \n 6.2 | \n 3.4 | \n 5.4 | \n 2.3 | \n versicolor | \n
\n \n 149 | \n 5.9 | \n 3.0 | \n 5.1 | \n 1.8 | \n versicolor | \n
\n \n
\n
150 rows × 5 columns
\n
"
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset = x.join(y)\n",
+ "dataset.columns = [*dataset.columns[:-1], 'iris']\n",
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Split between train and test set in a reproducible way."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "03fc5e2c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train, test = train_test_split(dataset, test_size=0.5, random_state=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We use as predictor a KNN with K = 7 and we train it."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "aa8a3128",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "0.9733333333333334"
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "predictor = KNeighborsClassifier(n_neighbors=4)\n",
+ "predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ "predictor.score(test.iloc[:, :-1], test.iloc[:, -1])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CART performance (3 rules):\n",
+ "Accuracy = 0.92\n",
+ "Fidelity = 0.92\n",
+ "\n",
+ "\n",
+ "CART extracted rules:\n",
+ "\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n",
+ " PetalWidth =< 0.75.\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n",
+ " PetalWidth =< 1.55.\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor).\n"
+ ]
+ }
+ ],
+ "source": [
+ "DTClassifier = DecisionTreeClassifier().fit(train.iloc[:, :-1], predictor.predict(train.iloc[:, :-1]))\n",
+ "cart = Extractor.cart(CartPredictor(DTClassifier))\n",
+ "theory_from_cart = cart.extract(train)\n",
+ "print(f'CART performance ({cart.n_rules} rules):')\n",
+ "print(f'Accuracy = {cart.accuracy(test):.2f}')\n",
+ "print(f'Fidelity = {cart.accuracy(test, predictor):.2f}\\n')\n",
+ "print('\\nCART extracted rules:\\n\\n' + pretty_theory(theory_from_cart))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We create a GridEx extractor to extract prolog rules from the same KNN."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GridEx performance (3 rules):\n",
+ "Accuracy = 0.88\n",
+ "Accuracy fidelity = 0.91\n",
+ "\n",
+ "GridEx extracted rules:\n",
+ "\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n",
+ " PetalLength in [1.199999, 1.912499].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n",
+ " PetalLength in [2.625, 4.7625].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n",
+ " PetalLength in [4.7625, 6.900001].\n"
+ ]
+ }
+ ],
+ "source": [
+ "ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()\n",
+ "gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)\n",
+ "theory_from_gridEx = gridEx.extract(train)\n",
+ "print('GridEx performance ({} rules):\\nAccuracy = {:.2f}\\nAccuracy fidelity = {:.2f}\\n'\n",
+ " .format(gridEx.n_rules, gridEx.accuracy(test), gridEx.accuracy(test, predictor)))\n",
+ "print('GridEx extracted rules:\\n\\n' + pretty_theory(theory_from_gridEx))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We use CReEPy and CREAM cluster-based extractors to perform the extraction."
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CReEPy performance (3 rules):\n",
+ "Accuracy = 0.79\n",
+ "Fidelity = 0.81\n",
+ "\n",
+ "CReEPy extracted rules:\n",
+ "\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n",
+ " SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n",
+ " SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n",
+ " SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].\n"
+ ]
+ }
+ ],
+ "source": [
+ "creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n",
+ "theory_from_creepy = creepy.extract(train)\n",
+ "print('CReEPy performance ({} rules):\\nAccuracy = {:.2f}\\nFidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.accuracy(test), creepy.accuracy(test, predictor)))\n",
+ "print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_creepy))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CREAM performance (3 rules):\n",
+ "Accuracy = 0.79\n",
+ "Fidelity = 0.81\n",
+ "\n",
+ "CREAM extracted rules:\n",
+ "\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n",
+ " SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n",
+ " SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].\n",
+ "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n",
+ " SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].\n"
+ ]
+ }
+ ],
+ "source": [
+ "cream = Extractor.cream(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n",
+ "theory_from_cream = cream.extract(train)\n",
+ "print('CREAM performance ({} rules):\\nAccuracy = {:.2f}\\nFidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.accuracy(test), cream.accuracy(test, predictor)))\n",
+ "print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/demo/DemoRegression.ipynb b/demo/DemoRegression.ipynb
new file mode 100644
index 00000000..3d6a9f5e
--- /dev/null
+++ b/demo/DemoRegression.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f52126f3",
+ "metadata": {},
+ "source": [
+ "# PSyKE's demo for regression tasks\n",
+ "\n",
+ "Some imports."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6b710e7c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from psyke import Extractor\n",
+ "from psyke.optimizer.pedro import PEDRO\n",
+ "from psyke.optimizer import Objective\n",
+ "from psyke.optimizer.crash import CRASH\n",
+ "from psyke.regression import HyperCubeExtractor\n",
+ "from sklearn.neighbors import KNeighborsRegressor\n",
+ "from psyke.utils.logic import pretty_theory\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from matplotlib import pyplot as plt\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d7c90ed2",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "Import an artificial dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f8e46c49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = pd.read_csv(\"test/resources/datasets/df.csv\")\n",
+ "#dataset = pd.read_csv(\"test/resources/datasets/CCPP.csv\", sep=\";\", decimal=\",\")\n",
+ "#dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d673b766",
+ "metadata": {},
+ "source": [
+ "Split between train and test set in a reproducible way."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "03fc5e2c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = dataset[[\"X\", \"Y\", \"Z4\"]].dropna()\n",
+ "train, test = train_test_split(dataset, test_size=0.5, random_state=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa6754a0",
+ "metadata": {},
+ "source": [
+ "We use as predictor a KNN and we train it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "bed764ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.03688944007870007\n"
+ ]
+ }
+ ],
+ "source": [
+ "predictor = KNeighborsRegressor(n_neighbors=3).fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ "\n",
+ "print((abs(predictor.predict(test.iloc[:, :-1]).flatten() - test.iloc[:, -1])).mean())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96835867",
+ "metadata": {},
+ "source": [
+ "We create several extractors that use ITER, GridEx and GridREx algorithms to extract prolog rules from the predictor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CReEPy performance (4 rules):\n",
+ "MAE = 0.00\n",
+ "MAE fidelity = 0.04\n",
+ "\n",
+ "CReEPy extracted rules:\n",
+ "\n",
+ "'Z4'(X, Y, Z4) :-\n",
+ " X in [0.402684, 0.543625], Y in [0.416106, 0.597316], Z4 is -6.805486 + 54.455317 * X + -24.861292 * Y.\n",
+ "'Z4'(X, Y, Z4) :-\n",
+ " X in [0.402684, 0.597316], Y in [0.402684, 0.597316], Z4 is -6.6031 + 54.353719 * X + -25.132222 * Y.\n",
+ "'Z4'(X, Y, Z4) :-\n",
+ " X in [0.154361, 0.845639], Y in [0.154361, 0.845639], Z4 is 9.003897 + -11.99489 * X + 14.988029 * Y.\n",
+ "'Z4'(X, Y, Z4) :-\n",
+ " X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001], Z4 is 1.999564 + 3.999876 * X + -2.999378 * Y.\n"
+ ]
+ }
+ ],
+ "source": [
+ "creepy = Extractor.creepy(predictor, depth=3, error_threshold=0.02, output=HyperCubeExtractor.Target.REGRESSION)\n",
+ "theory_from_creepy = creepy.extract(train)\n",
+ "print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))\n",
+ "print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_creepy))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CREAM performance (4 rules):\n",
+ "MAE = 2.39\n",
+ "MAE fidelity = 2.38\n",
+ "\n",
+ "CREAM extracted rules:\n",
+ "\n",
+ "'Z4'(X, Y, 8.051474) :-\n",
+ " X in [0.402684, 0.597316], Y in [0.402684, 0.597316].\n",
+ "'Z4'(X, Y, 10.458814) :-\n",
+ " X in [0.154361, 0.845639], Y in [0.154361, 0.845639].\n",
+ "'Z4'(X, Y, 0.931622) :-\n",
+ " X in [-0.000001, 0.496645], Y in [-0.000001, 1.000001].\n",
+ "'Z4'(X, Y, 4.024491) :-\n",
+ " X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001].\n"
+ ]
+ }
+ ],
+ "source": [
+ "cream = Extractor.cream(predictor, depth=2, error_threshold=0.02, output=HyperCubeExtractor.Target.CONSTANT)\n",
+ "theory_from_cream = cream.extract(train)\n",
+ "print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))\n",
+ "print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n",
+ "#plt.ylim((0, 1))\n",
+ "#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "#ax1.set_xlim((0, 1))\n",
+ "#ax1.set_aspect(\"equal\")\n",
+ "#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "#ax2.set_xlim((0, 1))\n",
+ "#ax2.set_aspect(\"equal\")\n",
+ "#plt.show()\n",
+ "\n",
+ "crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.5, algorithm=CRASH.Algorithm.CReEPy)\n",
+ "crash.search()\n",
+ "(_, _, depth, threshold) = crash.get_best()[0]\n",
+ "\n",
+ "creepy = Extractor.creepy(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)\n",
+ "theory_from_creepy = creepy.extract(train)\n",
+ "print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))\n",
+ "#print('CReEPy extracted rules:\\n\\n' + pretty_theory(theory_from_cream))\n",
+ "\n",
+ "plt.scatter(test.X, test.Y, c=creepy.predict(test.iloc[:, :-1]), s=0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "plt.xlim((0, 1))\n",
+ "plt.ylim((0, 1))\n",
+ "plt.gca().set_aspect(\"equal\")\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n",
+ "#plt.ylim((0, 1))\n",
+ "#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "#ax1.set_xlim((0, 1))\n",
+ "#ax1.set_aspect(\"equal\")\n",
+ "#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "#ax2.set_xlim((0, 1))\n",
+ "#ax2.set_aspect(\"equal\")\n",
+ "#plt.show()\n",
+ "\n",
+ "crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.75, algorithm=CRASH.Algorithm.CREAM)\n",
+ "crash.search()\n",
+ "(_, _, depth, threshold) = crash.get_best()[0]\n",
+ "\n",
+ "cream = Extractor.cream(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)\n",
+ "theory_from_cream = cream.extract(train)\n",
+ "print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))\n",
+ "#print('CREAM extracted rules:\\n\\n' + pretty_theory(theory_from_cream))\n",
+ "\n",
+ "plt.scatter(test.X, test.Y, c = cream.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))\n",
+ "plt.xlim((0, 1))\n",
+ "plt.ylim((0, 1))\n",
+ "plt.gca().set_aspect(\"equal\")\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "it = Extractor.iter(predictor, min_update=1.0 / 20, n_points=1, max_iterations=600,\n",
+ " min_examples=100, threshold=1.5)\n",
+ "theory_from_iter = it.extract(train)\n",
+ "print('ITER performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(it.n_rules, it.mae(test), it.mae(test, predictor)))\n",
+ "print('ITER extracted rules:\\n\\n' + pretty_theory(theory_from_iter))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n",
+ " max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDEX, objective=Objective.MODEL)\n",
+ "pedro.search()\n",
+ "(_, _, threshold, grid) = pedro.get_best()[0]\n",
+ "\n",
+ "gridEx = Extractor.gridex(predictor, grid, threshold=threshold)\n",
+ "theory_from_gridEx = gridEx.extract(train)\n",
+ "print('GridEx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(gridEx.n_rules, gridEx.mae(test), gridEx.mae(test, predictor)))\n",
+ "print('GridEx extracted rules:\\n\\n' + pretty_theory(theory_from_gridEx))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n",
+ " max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n",
+ "pedro.search()\n",
+ "(_, _, threshold, grid) = pedro.get_best()[0]\n",
+ "\n",
+ "gridREx = Extractor.gridrex(predictor, grid, threshold=threshold)\n",
+ "theory_from_gridREx = gridREx.extract(train)\n",
+ "print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, predictor)))\n",
+ "print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/demo/StairwAIdemo.ipynb b/demo/StairwAIdemo.ipynb
new file mode 100644
index 00000000..881a8fb1
--- /dev/null
+++ b/demo/StairwAIdemo.ipynb
@@ -0,0 +1,2080 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f52126f3",
+ "metadata": {},
+ "source": [
+ "# PSyKE's demo for regression tasks\n",
+ "\n",
+ "Some imports."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6b710e7c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pickle\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from os.path import exists\n",
+ "from psyke import Extractor\n",
+ "from psyke.utils.logic import pretty_theory\n",
+ "from psyke.regression import FeatureRanker\n",
+ "from psyke.optimizer.pedro import PEDRO\n",
+ "from sklearn.neighbors import KNeighborsRegressor\n",
+ "from psyke.optimizer import Objective\n",
+ "from psyke.optimizer.crash import CRASH\n",
+ "\n",
+ "import warnings\n",
+ "warnings.simplefilter(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "outputs": [],
+ "source": [
+ "algorithms = [\"contingency\", \"contingency\", \"anticipate\", \"anticipate\", \"contingency\", \"anticipate\"]\n",
+ "models = [\n",
+ " \"CONTINGENCY_no_input-memory_DecisionTree_MaxDepth10\",\n",
+ " \"CONTINGENCY_no_input-time_DecisionTree_MaxDepth10\",\n",
+ " \"ANTICIPATE_no_input-memory_DecisionTree_MaxDepth10\",\n",
+ " \"ANTICIPATE_no_input-time_DecisionTree_MaxDepth10\",\n",
+ " \"CONTINGENCY_input-cost_DecisionTree_MaxDepth15\",\n",
+ " \"ANTICIPATE_input-cost_DecisionTree_MaxDepth15\"\n",
+ "]\n",
+ "models = [\n",
+ " pickle.load(open(\"test/resources/datasets/models/\" + algorithm + \"/\" + path, 'rb'))\n",
+ " for path, algorithm in zip(models, algorithms)\n",
+ "]"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "outputs": [],
+ "source": [
+ "def process(algorithm):\n",
+ " df = pd.read_csv(\"test/resources/datasets/datasets/\" + algorithm + \"_trainDataset.csv\")\n",
+ "\n",
+ " # Removes header entries\n",
+ " df = df[df['sol(keuro)'] != 'sol(keuro)']\n",
+ "\n",
+ " # Fixed stuff which is always there\n",
+ " df['PV(kW)'] = df['PV(kW)'].map(lambda entry: entry[1:-1].split())\n",
+ " df['PV(kW)'] = df['PV(kW)'].map(lambda entry: list(np.float_(entry)))\n",
+ " df['Load(kW)'] = df['Load(kW)'].map(lambda entry: entry[1:-1].split())\n",
+ " df['Load(kW)'] = df['Load(kW)'].map(lambda entry: list(np.float_(entry)))\n",
+ "\n",
+ " X = pd.DataFrame()\n",
+ "\n",
+ " X['PV_mean'] = df['PV(kW)'].map(lambda entry: np.array(entry).mean())\n",
+ " X['PV_std'] = df['PV(kW)'].map(lambda entry: np.array(entry).std())\n",
+ " X['Load_mean'] = df['Load(kW)'].map(lambda entry: np.array(entry).mean())\n",
+ " X['Load_std'] = df['Load(kW)'].map(lambda entry: np.array(entry).std())\n",
+ " X['nScenarios'] = df['nScenarios']\n",
+ " X['cost'] = df['sol(keuro)']\n",
+ " X['time'] = df['time(sec)']\n",
+ " X['memo'] = df['memAvg(MB)']\n",
+ "\n",
+ " X.to_csv(\"test/resources/datasets/datasets/\" + algorithm + \".csv\", index = False)\n",
+ "\n",
+ " return X"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "outputs": [],
+ "source": [
+ "toRemove = [\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'time', 'cost'],\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'memo', 'cost'],\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'time', 'cost'],\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'memo', 'cost'],\n",
+ " [\"time\", \"memo\"],\n",
+ " [\"time\", \"memo\"]\n",
+ "]\n",
+ "\n",
+ "features = [\n",
+ " [\"nTraces\"],\n",
+ " [\"nTraces\"],\n",
+ " [\"nScenarios\"],\n",
+ " [\"nScenarios\"],\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'nTraces'],\n",
+ " ['PV_mean', 'PV_std', 'Load_mean', 'Load_std', 'nScenarios']\n",
+ "]\n",
+ "\n",
+ "targets = [\"memo\", \"time\", \"memo\", \"time\", \"cost\", \"cost\"]\n",
+ "\n",
+ "output = []"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1\n",
+ "MAE = 3.64\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 3.43. MAE = 2.29, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 5.34. MAE = 2.25, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 7.25. MAE = 2.25, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 3.43. MAE = 2.29, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 5.34. MAE = 2.32, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 3.43. MAE = 2.25, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 5.34. MAE = 2.30, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 3.43. MAE = 2.17, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 5.34. MAE = 2.30, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 7.25. MAE = 2.25, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 9.16. MAE = 2.25, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 2.17, 3 rules\n",
+ "Threshold = 3.43\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 2.17, 3 rules\n",
+ "Threshold = 3.43\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.25, 2 rules\n",
+ "Threshold = 7.25\n",
+ "Depth = 4\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 4.65\n",
+ "MAE fidelity = 2.28\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 3.43. MAE = 2.14, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 5.34. MAE = 2.18, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 3.43. MAE = 2.14, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 5.34. MAE = 2.18, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 2.14, 2 rules\n",
+ "Threshold = 3.43\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 2.14, 2 rules\n",
+ "Threshold = 3.43\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.18, 2 rules\n",
+ "Threshold = 5.34\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (2 rules):\n",
+ "MAE = 4.63\n",
+ "MAE fidelity = 2.31\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "contingency time 1\n",
+ "MAE = 0.84\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.74. MAE = 0.76, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 1.15. MAE = 0.80, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.74. MAE = 0.74, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 1.15. MAE = 0.82, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.74. MAE = 0.77, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 1.15. MAE = 0.82, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 1.56. MAE = 0.82, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.74. MAE = 0.74, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 1.15. MAE = 0.74, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 0.74. MAE = 0.74, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 1.15. MAE = 0.82, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 0.74, 2 rules\n",
+ "Threshold = 0.74\n",
+ "Depth = 5\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 0.74, 2 rules\n",
+ "Threshold = 0.74\n",
+ "Depth = 5\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 0.82, 2 rules\n",
+ "Threshold = 1.15\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (6 rules):\n",
+ "MAE = 0.97\n",
+ "MAE fidelity = 0.65\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.74. MAE = 0.79, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 1.15. MAE = 0.81, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.74. MAE = 0.78, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 1.15. MAE = 0.81, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 1.56. MAE = 0.81, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 1.98. MAE = 0.81, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 2.39. MAE = 0.81, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 2.80. MAE = 0.81, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 0.78, 3 rules\n",
+ "Threshold = 0.74\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 0.78, 3 rules\n",
+ "Threshold = 0.74\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 0.81, 2 rules\n",
+ "Threshold = 2.80\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (3 rules):\n",
+ "MAE = 1.03\n",
+ "MAE fidelity = 0.77\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate memo 1\n",
+ "MAE = 4.10\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 3.75. MAE = 3.37, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 5.83. MAE = 3.37, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 3.75. MAE = 3.37, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 5.83. MAE = 4.01, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 3.37, 2 rules\n",
+ "Threshold = 3.75\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 3.37, 2 rules\n",
+ "Threshold = 3.75\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 4.01, 2 rules\n",
+ "Threshold = 5.83\n",
+ "Depth = 2\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 4.96\n",
+ "MAE fidelity = 3.40\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 3.75. MAE = 4.07, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 5.83. MAE = 4.07, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 3.75. MAE = 3.84, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 5.83. MAE = 4.07, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 7.91. MAE = 4.07, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 3.84, 3 rules\n",
+ "Threshold = 3.75\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 3.84, 3 rules\n",
+ "Threshold = 3.75\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 4.07, 2 rules\n",
+ "Threshold = 5.83\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (2 rules):\n",
+ "MAE = 4.96\n",
+ "MAE fidelity = 3.40\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate time 1\n",
+ "MAE = 8.30\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 7.23. MAE = 8.19, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 11.25. MAE = 9.41, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 7.23. MAE = 9.46, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 11.25. MAE = 9.41, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 15.27. MAE = 7.03, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 19.29. MAE = 11.34, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 7.23. MAE = 11.65, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 11.25. MAE = 7.17, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 15.27. MAE = 10.95, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 7.23. MAE = 11.65, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 11.25. MAE = 9.41, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 15.27. MAE = 7.97, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 19.29. MAE = 11.65, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 7.23. MAE = 9.16, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 11.25. MAE = 11.65, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 7.03, 2 rules\n",
+ "Threshold = 15.27\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 7.03, 2 rules\n",
+ "Threshold = 15.27\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 11.65, 2 rules\n",
+ "Threshold = 19.29\n",
+ "Depth = 4\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 13.76\n",
+ "MAE fidelity = 10.72\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 7.23. MAE = 12.60, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 11.25. MAE = 12.66, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 7.23. MAE = 12.45, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 11.25. MAE = 11.95, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 15.27. MAE = 12.74, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 19.29. MAE = 12.66, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 23.30. MAE = 12.57, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 27.32. MAE = 12.57, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 11.95, 3 rules\n",
+ "Threshold = 11.25\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 11.95, 3 rules\n",
+ "Threshold = 11.25\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 12.57, 2 rules\n",
+ "Threshold = 23.30\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (3 rules):\n",
+ "MAE = 10.92\n",
+ "MAE fidelity = 6.81\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "contingency cost 5\n",
+ "MAE = 1.50\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.50. MAE = 25.88, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.78. MAE = 27.67, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.50. MAE = 27.32, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.78. MAE = 25.65, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 1.06. MAE = 26.93, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 25.65, 3 rules\n",
+ "Threshold = 0.78\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 25.65, 3 rules\n",
+ "Threshold = 0.78\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 27.67, 2 rules\n",
+ "Threshold = 0.78\n",
+ "Depth = 1\n",
+ "\n",
+ "CReEPy performance (3 rules):\n",
+ "MAE = 26.68\n",
+ "MAE fidelity = 26.88\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.50. MAE = 29.46, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.78. MAE = 29.46, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.50. MAE = 29.16, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.78. MAE = 29.19, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 29.16, 3 rules\n",
+ "Threshold = 0.50\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 29.16, 3 rules\n",
+ "Threshold = 0.50\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 29.46, 2 rules\n",
+ "Threshold = 0.50\n",
+ "Depth = 1\n",
+ "\n",
+ "CREAM performance (4 rules):\n",
+ "MAE = 26.66\n",
+ "MAE fidelity = 26.85\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate cost 5\n",
+ "MAE = 0.41\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.03. MAE = 1.77, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.05. MAE = 1.81, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.03. MAE = 2.24, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.05. MAE = 2.40, 3 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.03. MAE = 2.36, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.05. MAE = 2.39, 4 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.03. MAE = 2.30, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.05. MAE = 1.70, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.07. MAE = 1.68, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.09. MAE = 1.65, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.11. MAE = 1.70, 5 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 1.65, 5 rules\n",
+ "Threshold = 0.09\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.65, 5 rules\n",
+ "Threshold = 0.09\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 1.81, 2 rules\n",
+ "Threshold = 0.05\n",
+ "Depth = 1\n",
+ "\n",
+ "CReEPy performance (5 rules):\n",
+ "MAE = 1.43\n",
+ "MAE fidelity = 1.66\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.03. MAE = 2.87, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.05. MAE = 2.90, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.03. MAE = 2.65, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.05. MAE = 2.88, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.07. MAE = 2.82, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.09. MAE = 2.81, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.11. MAE = 2.82, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 2.65, 4 rules\n",
+ "Threshold = 0.03\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 2.65, 4 rules\n",
+ "Threshold = 0.03\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.90, 2 rules\n",
+ "Threshold = 0.05\n",
+ "Depth = 1\n",
+ "\n",
+ "CREAM performance (4 rules):\n",
+ "MAE = 1.36\n",
+ "MAE fidelity = 1.46\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for rem, feat, target, algorithm, model in zip(toRemove, features, targets, algorithms, models):\n",
+ "\n",
+ " print(algorithm, target, len(feat))\n",
+ " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ " if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ " dataset = pd.read_csv(name)\n",
+ "\n",
+ " dataset = dataset.drop(rem, axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n",
+ " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n",
+ " print(\"MAE = {:.2f}\".format(E.mean()))\n",
+ " output.append((\"model\", algorithm, target, E.mean()))\n",
+ " #plt.plot(E, \".\")\n",
+ " #plt.show()\n",
+ "\n",
+ " #pedro = PEDRO(model, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n",
+ " # max_depth=5, patience=1, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n",
+ " #pedro.search()\n",
+ " #best = pedro.get_best()[0]\n",
+ "\n",
+ " #ranked = FeatureRanker(dataset.columns[:-1]).fit(model, dataset.iloc[:, :-1]).rankings()\n",
+ " #gridREx = Extractor.gridrex(model, best[3], threshold=best[2])\n",
+ " #theory_from_gridREx = gridREx.extract(train)\n",
+ " #print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " # .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, model)))\n",
+ " #print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n",
+ " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.MODEL)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_creepy = creepy.extract(train)\n",
+ " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n",
+ " algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_cream = cream.extract(train)\n",
+ " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n",
+ "\n",
+ " print()\n",
+ " print()\n",
+ " print()\n",
+ " print()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1\n",
+ "MAE = 2.82\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 2.69. MAE = 4.75, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 4.18. MAE = 4.83, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 2.69. MAE = 4.83, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 4.18. MAE = 4.79, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 5.68. MAE = 4.75, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 7.17. MAE = 4.75, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 2.69. MAE = 4.81, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 4.18. MAE = 4.79, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 5.68. MAE = 4.81, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 2.69. MAE = 4.76, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 4.18. MAE = 4.81, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 5.68. MAE = 4.79, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 4.75, 2 rules\n",
+ "Threshold = 2.69\n",
+ "Depth = 1\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 4.75, 2 rules\n",
+ "Threshold = 2.69\n",
+ "Depth = 1\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 4.81, 2 rules\n",
+ "Threshold = 4.18\n",
+ "Depth = 4\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 4.61\n",
+ "MAE fidelity = 4.37\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 2.69. MAE = 4.59, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 4.18. MAE = 4.59, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 2.69. MAE = 4.40, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 4.18. MAE = 4.59, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 5.68. MAE = 4.59, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 4.40, 3 rules\n",
+ "Threshold = 2.69\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 4.40, 3 rules\n",
+ "Threshold = 2.69\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 4.59, 2 rules\n",
+ "Threshold = 5.68\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (4 rules):\n",
+ "MAE = 4.55\n",
+ "MAE fidelity = 3.93\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "contingency time 1\n",
+ "MAE = 1.02\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.90. MAE = 1.16, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 1.39. MAE = 1.19, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.90. MAE = 1.19, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 1.39. MAE = 1.16, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 1.89. MAE = 1.16, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 1.16, 2 rules\n",
+ "Threshold = 1.39\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.16, 2 rules\n",
+ "Threshold = 1.39\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 1.16, 2 rules\n",
+ "Threshold = 1.39\n",
+ "Depth = 2\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 1.10\n",
+ "MAE fidelity = 1.02\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.90. MAE = 1.10, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 1.39. MAE = 1.07, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 1.89. MAE = 1.07, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.90. MAE = 1.13, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 1.39. MAE = 1.07, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 1.89. MAE = 1.07, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 2.39. MAE = 1.10, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 1.07, 2 rules\n",
+ "Threshold = 1.89\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.07, 2 rules\n",
+ "Threshold = 1.89\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 1.10, 2 rules\n",
+ "Threshold = 2.39\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (2 rules):\n",
+ "MAE = 1.10\n",
+ "MAE fidelity = 1.02\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate memo 1\n",
+ "MAE = 4.91\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 4.46. MAE = 5.14, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 6.94. MAE = 5.14, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 4.46. MAE = 5.81, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 6.94. MAE = 5.13, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 9.42. MAE = 5.13, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 4.46. MAE = 5.15, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 6.94. MAE = 5.93, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 4.46. MAE = 5.13, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 6.94. MAE = 5.13, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 4.46. MAE = 5.14, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 6.94. MAE = 5.90, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 5.13, 2 rules\n",
+ "Threshold = 4.46\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 5.13, 2 rules\n",
+ "Threshold = 4.46\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 5.90, 2 rules\n",
+ "Threshold = 6.94\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 4.96\n",
+ "MAE fidelity = 4.57\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 4.46. MAE = 5.67, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 6.94. MAE = 5.67, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 4.46. MAE = 5.47, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 6.94. MAE = 5.67, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 9.42. MAE = 5.91, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 5.47, 3 rules\n",
+ "Threshold = 4.46\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 5.47, 3 rules\n",
+ "Threshold = 4.46\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 5.91, 2 rules\n",
+ "Threshold = 9.42\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (3 rules):\n",
+ "MAE = 5.03\n",
+ "MAE fidelity = 4.30\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate time 1\n",
+ "MAE = 9.34\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 8.25. MAE = 12.46, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 12.84. MAE = 14.29, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 8.25. MAE = 14.30, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 12.84. MAE = 12.33, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 17.42. MAE = 13.83, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 22.01. MAE = 11.68, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 26.59. MAE = 14.29, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 8.25. MAE = 12.38, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 12.84. MAE = 14.29, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 17.42. MAE = 12.46, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 22.01. MAE = 12.46, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 8.25. MAE = 13.45, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 12.84. MAE = 13.14, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 17.42. MAE = 13.39, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 8.25. MAE = 12.36, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 12.84. MAE = 14.29, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 17.42. MAE = 14.29, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 11.68, 2 rules\n",
+ "Threshold = 22.01\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 11.68, 2 rules\n",
+ "Threshold = 22.01\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 14.29, 2 rules\n",
+ "Threshold = 12.84\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 13.66\n",
+ "MAE fidelity = 11.29\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 8.25. MAE = 14.62, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 12.84. MAE = 14.62, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 8.25. MAE = 14.38, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 12.84. MAE = 14.67, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 17.42. MAE = 14.76, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 14.38, 3 rules\n",
+ "Threshold = 8.25\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 14.38, 3 rules\n",
+ "Threshold = 8.25\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 14.76, 2 rules\n",
+ "Threshold = 17.42\n",
+ "Depth = 2\n",
+ "\n",
+ "CREAM performance (3 rules):\n",
+ "MAE = 11.01\n",
+ "MAE fidelity = 8.26\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "contingency cost 5\n",
+ "MAE = 2.07\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 26.89, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 27.65, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.46, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.46, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 26.46, 3 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 26.46, 3 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 27.65, 2 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 1\n",
+ "\n",
+ "CReEPy performance (3 rules):\n",
+ "MAE = 26.68\n",
+ "MAE fidelity = 27.05\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.31, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.31, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 28.66, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.18, 4 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 28.66, 4 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 28.66, 4 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 29.31, 2 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 1\n",
+ "\n",
+ "CREAM performance (4 rules):\n",
+ "MAE = 26.71\n",
+ "MAE fidelity = 27.07\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "anticipate cost 5\n",
+ "MAE = 0.50\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.44, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.39, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 1.79, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.41, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 1.70, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.35, 3 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 1.70, 3 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.70, 3 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.41, 2 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 1\n",
+ "\n",
+ "CReEPy performance (3 rules):\n",
+ "MAE = 1.99\n",
+ "MAE fidelity = 2.12\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.86, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.83, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 2.86, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.89, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.80, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.65, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 2.81, 4 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 2.65, 4 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 2.65, 4 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.86, 2 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 1\n",
+ "\n",
+ "CREAM performance (4 rules):\n",
+ "MAE = 1.36\n",
+ "MAE fidelity = 1.53\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for rem, feat, target, algorithm in zip(toRemove, features, targets, algorithms):\n",
+ "\n",
+ " print(algorithm, target, len(feat))\n",
+ " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ " if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ " dataset = pd.read_csv(name)\n",
+ "\n",
+ " dataset = dataset.drop(rem, axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n",
+ " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n",
+ " print(\"MAE = {:.2f}\".format(E.mean()))\n",
+ " output.append((\"model\", algorithm, target, E.mean()))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n",
+ " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.DATA)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_creepy = creepy.extract(train)\n",
+ " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n",
+ " algorithm=CRASH.Algorithm.CREAM, objective=Objective.DATA)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_cream = cream.extract(train)\n",
+ " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n",
+ "\n",
+ " print()\n",
+ " print()\n",
+ " print()\n",
+ " print()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1\n",
+ "skip\n",
+ "contingency time 1\n",
+ "skip\n",
+ "anticipate memo 1\n",
+ "skip\n",
+ "anticipate time 1\n",
+ "skip\n",
+ "contingency cost 5\n",
+ "MAE = 1.50\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.50. MAE = 25.63, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.78. MAE = 27.54, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 27.54. MAE = 27.67, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.50. MAE = 26.13, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.78. MAE = 26.24, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 26.24. MAE = 23.74, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 51.69. MAE = 27.39, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 77.14. MAE = 28.03, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.50. MAE = 24.22, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.78. MAE = 23.26, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 1.06. MAE = 23.67, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 23.67. MAE = 25.08, 4 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.50. MAE = 23.93, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.78. MAE = 23.02, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 1.06. MAE = 23.50, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 23.50. MAE = 25.68, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 45.93. MAE = 26.90, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 68.36. MAE = 27.50, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 0.50. MAE = 23.02, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 0.78. MAE = 22.76, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 1.06. MAE = 23.45, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 23.45. MAE = 23.00, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 45.84. MAE = 27.50, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 68.23. MAE = 28.12, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 23.02, 5 rules\n",
+ "Threshold = 0.78\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 22.76, 6 rules\n",
+ "Threshold = 0.78\n",
+ "Depth = 5\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 27.50, 2 rules\n",
+ "Threshold = 45.84\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (5 rules):\n",
+ "MAE = 21.28\n",
+ "MAE fidelity = 21.36\n",
+ "\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 25.62, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 26.89, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 26.89. MAE = 27.48, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.56, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 25.63, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 24.52, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 26.68, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 26.68. MAE = 26.14, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 53.37. MAE = 27.42, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 80.05. MAE = 27.54, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 24.15, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 24.65, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 24.65. MAE = 26.10, 4 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.00. MAE = 23.24, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 0.00. MAE = 24.15, 5 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 24.15. MAE = 25.62, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 4. Threshold = 48.31. MAE = 26.52, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 0.00. MAE = 23.50, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 0.00. MAE = 24.71, 6 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 24.71. MAE = 24.40, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 49.41. MAE = 27.48, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 74.12. MAE = 26.89, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 5. Threshold = 98.83. MAE = 27.42, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 23.24, 5 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 23.24, 5 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 4\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 27.42, 2 rules\n",
+ "Threshold = 98.83\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (5 rules):\n",
+ "MAE = 25.50\n",
+ "MAE fidelity = 25.84\n",
+ "\n",
+ "\n",
+ "anticipate cost 5\n",
+ "MAE = 0.41\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.03. MAE = 2.70, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.05. MAE = 2.37, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.07. MAE = 1.96, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.09. MAE = 2.43, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 2.43. MAE = 1.77, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 4.77. MAE = 2.39, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.03. MAE = 2.42, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.05. MAE = 2.19, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.07. MAE = 2.25, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 2.25. MAE = 2.40, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 4.42. MAE = 1.77, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 6.59. MAE = 2.36, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 1.77, 2 rules\n",
+ "Threshold = 4.42\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.77, 2 rules\n",
+ "Threshold = 4.42\n",
+ "Depth = 2\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 2.36, 2 rules\n",
+ "Threshold = 6.59\n",
+ "Depth = 2\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 2.08\n",
+ "MAE fidelity = 2.16\n",
+ "\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.36, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 0.00. MAE = 2.36, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 2.36. MAE = 1.79, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 1. Threshold = 4.73. MAE = 2.69, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.35, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 0.00. MAE = 2.62, 3 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 2.62. MAE = 2.36, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 5.23. MAE = 1.77, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 2. Threshold = 7.85. MAE = 2.38, 2 rules\n",
+ "\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 2.35, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 0.00. MAE = 2.38, 4 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 2.38. MAE = 1.77, 2 rules\n",
+ "Algorithm.CReEPy. Depth: 3. Threshold = 4.77. MAE = 1.77, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CReEPy*\n",
+ "**********************\n",
+ "MAE = 1.77, 2 rules\n",
+ "Threshold = 2.38\n",
+ "Depth = 3\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 1.77, 2 rules\n",
+ "Threshold = 2.38\n",
+ "Depth = 3\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 1.77, 2 rules\n",
+ "Threshold = 2.38\n",
+ "Depth = 3\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 2.08\n",
+ "MAE fidelity = 2.19\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, (rem, feat, target, algorithm, model) in enumerate(zip(toRemove, features, targets, algorithms, models)):\n",
+ "\n",
+ " print(algorithm, target, len(feat))\n",
+ "\n",
+ " if i < 4:\n",
+ " print(\"skip\")\n",
+ " continue\n",
+ "\n",
+ " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ " if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ " dataset = pd.read_csv(name)\n",
+ "\n",
+ " dataset = dataset.drop(rem, axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n",
+ " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n",
+ " print(\"MAE = {:.2f}\".format(E.mean()))\n",
+ " output.append((\"model\", algorithm, target, E.mean()))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n",
+ " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.MODEL)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_creepy = creepy.extract(train)\n",
+ " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n",
+ "\n",
+ " print()\n",
+ "\n",
+ " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n",
+ " algorithm=CRASH.Algorithm.CReEPy, objective=Objective.DATA)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " creepy = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_creepy = creepy.extract(train)\n",
+ " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, model)))\n",
+ "\n",
+ " print()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1\n",
+ "skip\n",
+ "contingency time 1\n",
+ "skip\n",
+ "anticipate memo 1\n",
+ "skip\n",
+ "anticipate time 1\n",
+ "skip\n",
+ "contingency cost 5\n",
+ "MAE = 1.50\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.47, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.37, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 29.40, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 29.40. MAE = 29.27, 2 rules\n",
+ "Algorithm.CREAM. Depth: 1. Threshold = 58.80. MAE = 29.31, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.21, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.14, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 29.21, 4 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 29.21. MAE = 28.99, 3 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 58.41. MAE = 29.37, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 87.62. MAE = 29.37, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 116.83. MAE = 29.31, 2 rules\n",
+ "Algorithm.CREAM. Depth: 2. Threshold = 146.04. MAE = 29.47, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 28.22, 8 rules\n",
+ "Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 29.21, 8 rules\n",
+ "Algorithm.CREAM. Depth: 3. Threshold = 29.21. MAE = 29.01, 4 rules\n",
+ "Algorithm.CREAM. Depth: 3. Threshold = 58.43. MAE = 29.47, 2 rules\n",
+ "Algorithm.CREAM. Depth: 3. Threshold = 87.64. MAE = 29.47, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 4. Threshold = 0.00. MAE = 28.36, 16 rules\n",
+ "Algorithm.CREAM. Depth: 4. Threshold = 0.00. MAE = 27.78, 16 rules\n",
+ "Algorithm.CREAM. Depth: 4. Threshold = 27.78. MAE = 28.92, 5 rules\n",
+ "Algorithm.CREAM. Depth: 4. Threshold = 55.55. MAE = 29.31, 2 rules\n",
+ "Algorithm.CREAM. Depth: 4. Threshold = 83.33. MAE = 29.39, 2 rules\n",
+ "\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 28.63, 26 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 28.82, 22 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 0.00. MAE = 27.51, 21 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 27.51. MAE = 28.28, 6 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 55.03. MAE = 29.48, 2 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 82.54. MAE = 29.37, 2 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 110.06. MAE = 28.26, 2 rules\n",
+ "Algorithm.CREAM. Depth: 5. Threshold = 137.57. MAE = 29.24, 2 rules\n",
+ "\n",
+ "**********************\n",
+ "*Best Algorithm.CREAM*\n",
+ "**********************\n",
+ "MAE = 28.26, 2 rules\n",
+ "Threshold = 110.06\n",
+ "Depth = 5\n",
+ "\n",
+ "**********************\n",
+ "*Best MAE *\n",
+ "**********************\n",
+ "MAE = 27.51, 21 rules\n",
+ "Threshold = 0.00\n",
+ "Depth = 5\n",
+ "\n",
+ "**********************\n",
+ "*Best N rules*\n",
+ "**********************\n",
+ "MAE = 29.24, 2 rules\n",
+ "Threshold = 137.57\n",
+ "Depth = 5\n",
+ "\n",
+ "CReEPy performance (2 rules):\n",
+ "MAE = 27.93\n",
+ "MAE fidelity = 28.32\n",
+ "\n",
+ "\n",
+ "anticipate cost 5\n",
+ "skip\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, (rem, feat, target, algorithm, model) in enumerate(zip(toRemove, features, targets, algorithms, models)):\n",
+ "\n",
+ " print(algorithm, target, len(feat))\n",
+ "\n",
+ " if i != 4:\n",
+ " print(\"skip\")\n",
+ " continue\n",
+ "\n",
+ " name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ " if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ " dataset = pd.read_csv(name)\n",
+ "\n",
+ " dataset = dataset.drop(rem, axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.2, random_state=10)\n",
+ " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " E = abs(model.predict(test.iloc[:, :-1]) - test.iloc[:, -1])\n",
+ " print(\"MAE = {:.2f}\".format(E.mean()))\n",
+ " output.append((\"model\", algorithm, target, E.mean()))\n",
+ "\n",
+ " #crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n",
+ " # algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n",
+ " #crash.search()\n",
+ " #best = crash.get_best()[0]\n",
+ "\n",
+ " #cream = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " #theory_from_cream = cream.extract(train)\n",
+ " #print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " # .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n",
+ "\n",
+ " #print()\n",
+ "\n",
+ " model = KNeighborsRegressor(n_neighbors=1).fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.5, max_depth=5, patience=2,\n",
+ " algorithm=CRASH.Algorithm.CREAM, objective=Objective.DATA)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " cream = Extractor.creepy(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_cream = cream.extract(train)\n",
+ " print('CReEPy performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n",
+ "\n",
+ " print()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The slowest run took 113.80 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
+ "50 µs ± 137 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%timeit -r 100 -n 1 print(5, end=\"\\r\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1 ['PV_mean', 'PV_std', 'Load_mean', 'Load_std'] ['time', 'cost']\n",
+ "\n",
+ "5 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.03 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.08 +- 0.02\n",
+ "\n",
+ "2000 instances\n",
+ "0.24 +- 0.04\n",
+ "\n",
+ "4000 instances\n",
+ "0.40 +- 0.07\n",
+ "\n",
+ "7000 instances\n",
+ "1.49 +- 0.16\n",
+ "\n",
+ "10000 instances\n",
+ "3.24 +- 0.48\n",
+ "\n",
+ "\n",
+ "4 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.03 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.08 +- 0.01\n",
+ "\n",
+ "2000 instances\n",
+ "0.19 +- 0.02\n",
+ "\n",
+ "4000 instances\n",
+ "0.44 +- 0.10\n",
+ "\n",
+ "7000 instances\n",
+ "1.18 +- 0.06\n",
+ "\n",
+ "10000 instances\n",
+ "3.27 +- 0.34\n",
+ "\n",
+ "\n",
+ "3 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.03 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.06 +- 0.01\n",
+ "\n",
+ "2000 instances\n",
+ "0.15 +- 0.01\n",
+ "\n",
+ "4000 instances\n",
+ "0.35 +- 0.03\n",
+ "\n",
+ "7000 instances\n",
+ "0.96 +- 0.05\n",
+ "\n",
+ "10000 instances\n",
+ "2.01 +- 0.18\n",
+ "\n",
+ "\n",
+ "2 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.03 +- 0.00\n",
+ "\n",
+ "500 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.07 +- 0.01\n",
+ "\n",
+ "2000 instances\n",
+ "0.12 +- 0.02\n",
+ "\n",
+ "4000 instances\n",
+ "0.32 +- 0.07\n",
+ "\n",
+ "7000 instances\n",
+ "0.66 +- 0.04\n",
+ "\n",
+ "10000 instances\n",
+ "2.09 +- 0.24\n",
+ "\n",
+ "\n",
+ "1 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.06 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.38 +- 0.18\n",
+ "\n",
+ "2000 instances\n",
+ "0.81 +- 0.11\n",
+ "\n",
+ "4000 instances\n",
+ "1.17 +- 0.08\n",
+ "\n",
+ "7000 instances\n",
+ "7.75 +- 0.50\n",
+ "\n",
+ "10000 instances\n",
+ "4.50 +- 0.29\n",
+ "\n",
+ "\n",
+ "{5: [0.03287119388580322, 0.041033308506011966, 0.08207057476043701, 0.24315330743789673, 0.40391945362091064, 1.488190951347351, 3.2419833016395567], 4: [0.02859572172164917, 0.042196600437164306, 0.07671485900878906, 0.18936739206314088, 0.4387374997138977, 1.181975963115692, 3.269099836349487], 3: [0.027770025730133055, 0.0483594012260437, 0.06041494607925415, 0.15415239572525025, 0.3471947169303894, 0.9557713294029235, 2.00965603351593], 2: [0.03156923770904541, 0.043448188304901124, 0.06604753017425537, 0.1234019160270691, 0.3216898012161255, 0.6594726300239563, 2.088407206535339], 1: [0.054462478160858155, 0.06058593988418579, 0.37771976232528687, 0.8130515933036804, 1.1683255887031556, 7.751835277080536, 4.501545391082764]}\n",
+ "{5: [0.008006568310036771, 0.010832056515099364, 0.017278037467434816, 0.04436329902541149, 0.07084922210433292, 0.16235953798011757, 0.47879549725950094], 4: [0.006010070122448614, 0.007395213923417343, 0.012944601559750614, 0.018297218049809154, 0.09872410923183107, 0.0566825482202447, 0.3407695658478622], 3: [0.006643890546785536, 0.010349164269489706, 0.00623181652795555, 0.007351175705842842, 0.0283532494275511, 0.04679988236936149, 0.17987806095635428], 2: [0.003937103959197547, 0.01014291744697181, 0.00619447504987264, 0.019691113374270007, 0.07097996098355641, 0.040935159311804685, 0.24153349831790663], 1: [0.00812968064374636, 0.006694817921617686, 0.17816417920507002, 0.1053105435784018, 0.07591906174957001, 0.5019025282675496, 0.2873544931115201]}\n"
+ ]
+ }
+ ],
+ "source": [
+ "import time\n",
+ "\n",
+ "i = 0\n",
+ "\n",
+ "rem, feat, target, algorithm, model = toRemove[i], features[i], targets[i], algorithms[i], models[i]\n",
+ "\n",
+ "print(algorithm, target, len(feat), rem[:-2], rem[-2:])\n",
+ "print()\n",
+ "name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ "if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ "dataset = pd.read_csv(name).drop(rem[-2 :], axis = 1)\n",
+ "\n",
+ "m = {}\n",
+ "e = {}\n",
+ "\n",
+ "for r in range(-1, len(rem) - 2):\n",
+ " if r >= 0:\n",
+ " dataset = dataset.drop([rem[r]], axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.1, random_state=10)\n",
+ " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ "\n",
+ " print(f\"{len(dataset.columns) - 1} variables\\n\")\n",
+ " dat = []\n",
+ " for j in [100, 500, 1000, 2000, 4000, 7000, 10000]:\n",
+ " print(j, \"instances\")\n",
+ " res = []\n",
+ " for i in range(100):\n",
+ " print(i, end=\"\\r\")\n",
+ " t0 = time.time()\n",
+ " creepy = Extractor.creepy(model, depth=1, error_threshold=.8, constant=False)\n",
+ " theory_from_creepy = creepy.extract(train.iloc[:j, :])\n",
+ " t1 = time.time()\n",
+ " res.append(t1 - t0)\n",
+ " res = np.array(res)\n",
+ " print(f'{np.mean(res):.2f} +- {np.std(res):.2f}')\n",
+ " print()\n",
+ " dat.append((np.mean(res), np.std(res)))\n",
+ " m[len(dataset.columns) - 1] = [d[0] for d in dat]\n",
+ " e[len(dataset.columns) - 1] = [d[1] for d in dat]\n",
+ " print()\n",
+ "print(m)\n",
+ "print(e)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "contingency memo 1 ['PV_mean', 'PV_std', 'Load_mean', 'Load_std'] ['time', 'cost']\n",
+ "\n",
+ "5 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.11 +- 0.02\n",
+ "\n",
+ "2000 instances\n",
+ "0.25 +- 0.04\n",
+ "\n",
+ "4000 instances\n",
+ "0.43 +- 0.09\n",
+ "\n",
+ "7000 instances\n",
+ "1.53 +- 0.22\n",
+ "\n",
+ "10000 instances\n",
+ "3.14 +- 0.44\n",
+ "\n",
+ "\n",
+ "4 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.08 +- 0.00\n",
+ "\n",
+ "2000 instances\n",
+ "0.19 +- 0.02\n",
+ "\n",
+ "4000 instances\n",
+ "0.40 +- 0.01\n",
+ "\n",
+ "7000 instances\n",
+ "1.21 +- 0.10\n",
+ "\n",
+ "10000 instances\n",
+ "3.15 +- 0.21\n",
+ "\n",
+ "\n",
+ "3 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.07 +- 0.01\n",
+ "\n",
+ "2000 instances\n",
+ "0.16 +- 0.01\n",
+ "\n",
+ "4000 instances\n",
+ "0.35 +- 0.02\n",
+ "\n",
+ "7000 instances\n",
+ "0.97 +- 0.08\n",
+ "\n",
+ "10000 instances\n",
+ "1.98 +- 0.17\n",
+ "\n",
+ "\n",
+ "2 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.04 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.06 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.08 +- 0.01\n",
+ "\n",
+ "2000 instances\n",
+ "0.11 +- 0.01\n",
+ "\n",
+ "4000 instances\n",
+ "0.33 +- 0.07\n",
+ "\n",
+ "7000 instances\n",
+ "0.64 +- 0.03\n",
+ "\n",
+ "10000 instances\n",
+ "2.01 +- 0.12\n",
+ "\n",
+ "\n",
+ "1 variables\n",
+ "\n",
+ "100 instances\n",
+ "0.05 +- 0.01\n",
+ "\n",
+ "500 instances\n",
+ "0.07 +- 0.01\n",
+ "\n",
+ "1000 instances\n",
+ "0.38 +- 0.18\n",
+ "\n",
+ "2000 instances\n",
+ "0.78 +- 0.03\n",
+ "\n",
+ "4000 instances\n",
+ "1.22 +- 0.16\n",
+ "\n",
+ "7000 instances\n",
+ "7.58 +- 0.35\n",
+ "\n",
+ "10000 instances\n",
+ "4.45 +- 0.24\n",
+ "\n",
+ "\n",
+ "{5: [0.042794463634490965, 0.05281662464141846, 0.10578654050827026, 0.2454042077064514, 0.4272336745262146, 1.5308331942558289, 3.1398912501335143], 4: [0.037742633819580075, 0.05019772291183472, 0.07981077432632447, 0.19199324607849122, 0.4008229851722717, 1.207040240764618, 3.1493691873550413], 3: [0.03636547803878784, 0.054166588783264163, 0.0737765073776245, 0.16370140075683592, 0.35039893388748167, 0.9726590657234192, 1.9785983300209045], 2: [0.03985038995742798, 0.056003010272979735, 0.07539872884750366, 0.11321240425109863, 0.32898611307144165, 0.6414580917358399, 2.012137541770935], 1: [0.05446675777435303, 0.0706281042098999, 0.37666619300842286, 0.777180597782135, 1.2234806847572326, 7.578078618049622, 4.447733290195465]}\n",
+ "{5: [0.006960809869187141, 0.007676563695357571, 0.0217224658216331, 0.04145347891457142, 0.09026192145722545, 0.22107988902352302, 0.4350317521126616], 4: [0.007603805854676417, 0.006809748933163486, 0.004490720164146021, 0.017805997294028073, 0.009527117999078386, 0.10359012897907922, 0.21151193401795604], 3: [0.0074533542868429345, 0.007757262205622548, 0.011480113435289822, 0.007219566419900182, 0.020423586767389295, 0.08343571324580469, 0.17491113373962613], 2: [0.00786624021155995, 0.012563408894036322, 0.008872308138793765, 0.0061607795200352014, 0.07073490739624513, 0.028163153334897523, 0.12469774042959099], 1: [0.00790054982045218, 0.009733123608675343, 0.17951258136789902, 0.03204691544868753, 0.16452532889211433, 0.3474080984680701, 0.23790970889358112]}\n"
+ ]
+ }
+ ],
+ "source": [
+ "import time\n",
+ "\n",
+ "i = 0\n",
+ "\n",
+ "rem, feat, target, algorithm, model = toRemove[i], features[i], targets[i], algorithms[i], models[i]\n",
+ "\n",
+ "print(algorithm, target, len(feat), rem[:-2], rem[-2:])\n",
+ "print()\n",
+ "name = f\"test/resources/datasets/datasets/{algorithm}.csv\"\n",
+ "\n",
+ "if not exists(name):\n",
+ " process(algorithm)\n",
+ "\n",
+ "dataset = pd.read_csv(name).drop(rem[-2 :], axis = 1)\n",
+ "\n",
+ "m = {}\n",
+ "e = {}\n",
+ "\n",
+ "for r in range(-1, len(rem) - 2):\n",
+ " if r >= 0:\n",
+ " dataset = dataset.drop([rem[r]], axis = 1)\n",
+ "\n",
+ " train, test = train_test_split(dataset, test_size=0.1, random_state=10)\n",
+ " model.fit(train.iloc[:, :-1], train.iloc[:, -1])\n",
+ "\n",
+ " print(f\"{len(dataset.columns) - 1} variables\\n\")\n",
+ " dat = []\n",
+ " for j in [100, 500, 1000, 2000, 4000, 7000, 10000]:\n",
+ " print(j, \"instances\")\n",
+ " res = []\n",
+ " for i in range(100):\n",
+ " print(i, end=\"\\r\")\n",
+ " t0 = time.time()\n",
+ " cream = Extractor.cream(model, depth=1, error_threshold=.8, constant=False)\n",
+ " theory_from_cream = cream.extract(train.iloc[:j, :])\n",
+ " t1 = time.time()\n",
+ " res.append(t1 - t0)\n",
+ " res = np.array(res)\n",
+ " print(f'{np.mean(res):.2f} +- {np.std(res):.2f}')\n",
+ " print()\n",
+ " dat.append((np.mean(res), np.std(res)))\n",
+ " m[len(dataset.columns) - 1] = [d[0] for d in dat]\n",
+ " e[len(dataset.columns) - 1] = [d[1] for d in dat]\n",
+ " print()\n",
+ "print(m)\n",
+ "print(e)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "while True:\n",
+ " #pedro = PEDRO(model, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,\n",
+ " # max_depth=5, patience=1, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)\n",
+ " #pedro.search()\n",
+ " #best = pedro.get_best()[0]\n",
+ "\n",
+ " #ranked = FeatureRanker(dataset.columns[:-1]).fit(model, dataset.iloc[:, :-1]).rankings()\n",
+ " #gridREx = Extractor.gridrex(model, best[3], threshold=best[2])\n",
+ " #theory_from_gridREx = gridREx.extract(train)\n",
+ " #print('GridREx performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " # .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, model)))\n",
+ " #print('GridREx extracted rules:\\n\\n' + pretty_theory(theory_from_gridREx))\n",
+ "\n",
+ " crash = CRASH(model, train, readability_tradeoff=0.1, max_depth=5, patience=1,\n",
+ " algorithm=CRASH.Algorithm.CREAM, objective=Objective.MODEL)\n",
+ " crash.search()\n",
+ " best = crash.get_best()[0]\n",
+ "\n",
+ " cream = Extractor.cream(model, depth=best[2], error_threshold=best[3], constant=False)\n",
+ " theory_from_cream = cream.extract(train)\n",
+ " print('CREAM performance ({} rules):\\nMAE = {:.2f}\\nMAE fidelity = {:.2f}\\n'\n",
+ " .format(cream.n_rules, cream.mae(test), cream.mae(test, model)))\n",
+ "\n",
+ " print()\n",
+ " print()\n",
+ " print()\n"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file