From e2f8c2c1a8dfd59741355b22ab64570093264cd2 Mon Sep 17 00:00:00 2001 From: Federico Sabbatini Date: Sun, 3 Jul 2022 16:25:48 +0200 Subject: [PATCH] feat!: added gui --- DemoClassification.ipynb | 76 +--- gui.py | 250 +++++++++++ psyke.kv | 0 psyke/__init__.py | 4 +- psyke/cart/__init__.py | 4 +- psyke/classification/real/__init__.py | 7 +- psyke/clustering/__init__.py | 6 +- psyke/clustering/creepy/__init__.py | 4 +- psyke/gui/__init__.py | 452 ++++++++++++++++++++ psyke/regression/__init__.py | 12 +- psyke/regression/gridex/__init__.py | 6 +- requirements.txt | 3 +- test/psyke/classification/real/test_real.py | 2 +- test/psyke/regression/gridex/test_gridex.py | 10 +- test/psyke/regression/iter/test_iter.py | 4 +- 15 files changed, 752 insertions(+), 88 deletions(-) create mode 100644 gui.py create mode 100644 psyke.kv create mode 100644 psyke/gui/__init__.py diff --git a/DemoClassification.ipynb b/DemoClassification.ipynb index e74c000d..a1f92e8a 100644 --- a/DemoClassification.ipynb +++ b/DemoClassification.ipynb @@ -245,21 +245,21 @@ "execution_count": 9, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "GridEx performance (3 rules):\n", - "Accuracy = 0.88\n", - "Accuracy fidelity = 0.91\n", - "\n", - "GridEx extracted rules:\n", - "\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", - " PetalLength in [1.19, 1.91].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", - " PetalLength in [2.62, 4.76].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", - " PetalLength in [4.76, 6.90].\n" + "ename": "TypeError", + "evalue": "'<' not supported between instances of 'NoneType' and 'str'", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mTypeError\u001B[0m Traceback (most recent call last)", + "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_15448/549355453.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 3\u001B[0m \u001B[0mtheory_from_gridEx\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mgridEx\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mextract\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtrain\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 4\u001B[0m print('GridEx performance ({} rules):\\nAccuracy = {:.2f}\\nAccuracy fidelity = {:.2f}\\n'\n\u001B[1;32m----> 5\u001B[1;33m .format(gridEx.n_rules, gridEx.accuracy(test), gridEx.accuracy(test, predictor)))\n\u001B[0m\u001B[0;32m 6\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'GridEx extracted rules:\\n\\n'\u001B[0m \u001B[1;33m+\u001B[0m \u001B[0mpretty_theory\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtheory_from_gridEx\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\Desktop\\psyke\\psyke-python\\psyke\\__init__.py\u001B[0m in \u001B[0;36maccuracy\u001B[1;34m(self, dataframe, predictor)\u001B[0m\n\u001B[0;32m 100\u001B[0m \"\"\"\n\u001B[0;32m 101\u001B[0m \u001B[0mpredictions\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0marray\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpredict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdataframe\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0miloc\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;33m:\u001B[0m\u001B[1;33m-\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 102\u001B[1;33m return accuracy_score(dataframe.iloc[:, -1] if predictor is None else\n\u001B[0m\u001B[0;32m 103\u001B[0m \u001B[0mpredictor\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpredict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdataframe\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0miloc\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;33m:\u001B[0m\u001B[1;33m-\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mflatten\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 104\u001B[0m predictions)\n", + "\u001B[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py\u001B[0m in \u001B[0;36maccuracy_score\u001B[1;34m(y_true, y_pred, normalize, sample_weight)\u001B[0m\n\u001B[0;32m 209\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 210\u001B[0m \u001B[1;31m# Compute accuracy for each possible representation\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 211\u001B[1;33m \u001B[0my_type\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my_true\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my_pred\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0m_check_targets\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my_true\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my_pred\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 212\u001B[0m \u001B[0mcheck_consistent_length\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my_true\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my_pred\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0msample_weight\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 213\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0my_type\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mstartswith\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m\"multilabel\"\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py\u001B[0m in \u001B[0;36m_check_targets\u001B[1;34m(y_true, y_pred)\u001B[0m\n\u001B[0;32m 84\u001B[0m \u001B[0mcheck_consistent_length\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my_true\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my_pred\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 85\u001B[0m \u001B[0mtype_true\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mtype_of_target\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my_true\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 86\u001B[1;33m \u001B[0mtype_pred\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mtype_of_target\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my_pred\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 87\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 88\u001B[0m \u001B[0my_type\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;33m{\u001B[0m\u001B[0mtype_true\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mtype_pred\u001B[0m\u001B[1;33m}\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\multiclass.py\u001B[0m in \u001B[0;36mtype_of_target\u001B[1;34m(y)\u001B[0m\n\u001B[0;32m 325\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[1;34m\"continuous\"\u001B[0m \u001B[1;33m+\u001B[0m \u001B[0msuffix\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 326\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 327\u001B[1;33m \u001B[1;32mif\u001B[0m \u001B[1;33m(\u001B[0m\u001B[0mlen\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mnp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0munique\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;33m>\u001B[0m \u001B[1;36m2\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mor\u001B[0m \u001B[1;33m(\u001B[0m\u001B[0my\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mndim\u001B[0m \u001B[1;33m>=\u001B[0m \u001B[1;36m2\u001B[0m \u001B[1;32mand\u001B[0m \u001B[0mlen\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0my\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;33m>\u001B[0m \u001B[1;36m1\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 328\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[1;34m\"multiclass\"\u001B[0m \u001B[1;33m+\u001B[0m \u001B[0msuffix\u001B[0m \u001B[1;31m# [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 329\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m<__array_function__ internals>\u001B[0m in \u001B[0;36munique\u001B[1;34m(*args, **kwargs)\u001B[0m\n", + "\u001B[1;32m~\\Anaconda3\\lib\\site-packages\\numpy\\lib\\arraysetops.py\u001B[0m in \u001B[0;36munique\u001B[1;34m(ar, return_index, return_inverse, return_counts, axis)\u001B[0m\n\u001B[0;32m 260\u001B[0m \u001B[0mar\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0masanyarray\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mar\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 261\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0maxis\u001B[0m \u001B[1;32mis\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 262\u001B[1;33m \u001B[0mret\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0m_unique1d\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mar\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mreturn_index\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mreturn_inverse\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mreturn_counts\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 263\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0m_unpack_tuple\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mret\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 264\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\Anaconda3\\lib\\site-packages\\numpy\\lib\\arraysetops.py\u001B[0m in \u001B[0;36m_unique1d\u001B[1;34m(ar, return_index, return_inverse, return_counts)\u001B[0m\n\u001B[0;32m 321\u001B[0m \u001B[0maux\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mar\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mperm\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 322\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 323\u001B[1;33m \u001B[0mar\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msort\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 324\u001B[0m \u001B[0maux\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mar\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 325\u001B[0m \u001B[0mmask\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mempty\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0maux\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mshape\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mdtype\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mnp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mbool_\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;31mTypeError\u001B[0m: '<' not supported between instances of 'NoneType' and 'str'" ] } ], @@ -292,27 +292,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CReEPy performance (3 rules):\n", - "Accuracy = 0.79\n", - "Fidelity = 0.81\n", - "\n", - "CReEPy extracted rules:\n", - "\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", - " SepalLength in [4.89, 6.70], SepalWidth in [2.19, 3.20], PetalLength in [2.99, 5.00], PetalWidth in [0.99, 1.80].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", - " SepalLength in [4.89, 7.70], SepalWidth in [2.19, 3.80], PetalLength in [2.99, 6.90], PetalWidth in [0.99, 2.50].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", - " SepalLength in [4.39, 7.90], SepalWidth in [2.19, 4.10], PetalLength in [1.19, 6.90], PetalWidth in [0.09, 2.50].\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n", "theory_from_creepy = creepy.extract(train)\n", @@ -329,27 +310,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CREAM performance (3 rules):\n", - "Accuracy = 0.79\n", - "Fidelity = 0.81\n", - "\n", - "CREAM extracted rules:\n", - "\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-\n", - " SepalLength in [4.89, 6.70], SepalWidth in [2.19, 3.20], PetalLength in [2.99, 5.00], PetalWidth in [0.99, 1.80].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-\n", - " SepalLength in [4.89, 7.70], SepalWidth in [2.19, 3.80], PetalLength in [2.99, 6.90], PetalWidth in [0.99, 2.50].\n", - "iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-\n", - " SepalLength in [4.39, 7.90], SepalWidth in [2.19, 4.10], PetalLength in [1.19, 6.90], PetalWidth in [0.09, 2.50].\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "cream = Extractor.cream(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)\n", "theory_from_cream = cream.extract(train)\n", diff --git a/gui.py b/gui.py new file mode 100644 index 00000000..2fd38385 --- /dev/null +++ b/gui.py @@ -0,0 +1,250 @@ +import kivy +from kivy.app import App +from kivy.core.window import Window +from kivy.uix.boxlayout import BoxLayout +from kivy.uix.button import Button +from kivy.uix.gridlayout import GridLayout +from kivy.uix.label import Label +from kivy.uix.screenmanager import ScreenManager, Screen + +from sklearn.datasets import fetch_california_housing, load_iris, load_wine +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +from psyke import Extractor +from psyke.cart import CartPredictor +from psyke.clustering import ClusterExtractor +from psyke.gui import TaskBox, DataBox, DataInfoBox, PredictorParameterBox, PredictorInfoBox, PredictorBox, \ + ExtractorBox, ExtractorParameterBox, ExtractorInfoBox, ExtractorPerformanceInfoBox, PredictorPerformanceInfoBox, \ + TitleBox, VerticalBoxLayout, HorizontalBoxLayout, DataManipulationBox +from psyke.regression import Grid, FixedStrategy +from psyke.utils.logic import pretty_theory + +import re + +kivy.require('2.1.0') # replace with your current kivy version ! + +Window.top = 50 +Window.left = 10 +Window.size = (1400, 750) + +CLASSIFICATION_DATA = ['Iris', 'Wine'] + +REGRESSION_DATA = ['House', 'Artificial10'] + +MODELS = ['KNN', 'DT'] + +CLASSIFICATION_EXTRACTORS = ['REAL', 'Trepan', 'CART', 'GridEx', 'CReEPy', 'CREAM'] + +REGRESSION_EXTRACTORS = ['Iter', 'CART', 'GridEx', 'GridREx', 'CReEPy', 'CREAM'] + + +class PSyKEApp(App): + + def build(self): + return PSyKEScreenManager() + + +class PSyKEScreenManager(ScreenManager): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.main_screen = Screen(name="main_screen") + self.wait_screen = Screen(name="wait_screen") + self.theory_screen = Screen(name="theory_screen") + + self.add_widget(self.main_screen) + self.add_widget(self.wait_screen) + self.add_widget(self.theory_screen) + + self.main_screen.add_widget(PSyKEMainScreen(self)) + wait_box = BoxLayout() + wait_box.add_widget(Label(text='Please wait...')) + self.wait_screen.add_widget(wait_box) + self.concrete_theory_screen = PSyKETheoryScreen(self.main) + self.theory_screen.add_widget(self.concrete_theory_screen) + + def wait(self): + self.current = "wait_screen" + + def main(self, widget=None): + self.current = "main_screen" + + def show_theory(self, theory): + self.concrete_theory_screen.set_theory(theory) + self.current = 'theory_screen' + + +class PSyKETheoryScreen(VerticalBoxLayout): + + def __init__(self, action, **kwargs): + super().__init__(**kwargs) + self.back_to_main = action + + def set_theory(self, theory): + self.clear_widgets() + text = pretty_theory(theory) + self.add_widget(Label(text='Extracted theory:\n\n' + re.sub(r"\.\n", ".\n\n", text))) + button = Button(text='Back', size_hint_y=None, height=30) + button.bind(on_press=self.back_to_main) + self.add_widget(button) + #self.add_widget(Label()) + + +class PSyKEMainScreen(GridLayout): + + def __init__(self, manager, **kwargs): + super().__init__(**kwargs) + self.cols = 4 + self.manager = manager + self.data = None + self.train = None + self.test = None + self.task = 'C' + self.model_kind = 'KNN' + self.model = None + self.extractor_kind = {'C': 'REAL', 'R': 'Iter'} + self.extractor = None + self.theory = None + self.taskBox = TaskBox(self) + self.dataBox = DataBox(self, CLASSIFICATION_DATA, REGRESSION_DATA) + self.extractorParamBox = ExtractorParameterBox(self) + self.dataInfoBox = DataInfoBox() + self.dataManipulationBox = DataManipulationBox(self) + self.predictorBox = PredictorBox(self, MODELS) + self.predictorParamBox = PredictorParameterBox(self) + self.predictorInfoBox = PredictorInfoBox() + self.predictorPerformanceInfoBox = PredictorPerformanceInfoBox() + self.extractorBox = ExtractorBox(self, CLASSIFICATION_EXTRACTORS, REGRESSION_EXTRACTORS) + self.extractorInfoBox = ExtractorInfoBox() + self.extractorPerformanceInfoBox = ExtractorPerformanceInfoBox(action=self.show_theory) + self.widgets = [ + self.taskBox, self.dataBox, self.dataInfoBox, self.dataManipulationBox, + self.predictorBox, self.predictorParamBox, self.predictorInfoBox, self.predictorPerformanceInfoBox, + self.extractorBox, self.extractorParamBox, self.extractorInfoBox, self.extractorPerformanceInfoBox + ] + + for widget in self.widgets: + self.add_widget(widget) + + def select_task(self, task): + self.task = task + self.dataBox.reset() + self.dataManipulationBox.reset() + # self.predictorBox.reset(MODELS) + self.select_model() + self.extractorBox.reset(self.task) + self.select_extractor() + self.predictorParamBox.disable() + self.extractorParamBox.disable() + + def select_model(self): + self.model_kind = self.predictorBox.model + self.predictorParamBox.reset() + self.extractorParamBox.disable() + + def select_extractor(self): + self.extractor_kind[self.task] = self.extractorBox.extractor[self.task] + self.extractorParamBox.reset() + + def select_dataset(self, widget): + dataset = self.dataBox.dataset[self.task] + print(f'Loading {dataset}... ', end='') + if dataset == 'Iris': + x, y = load_iris(return_X_y=True, as_frame=True) + self.data = (x, y.replace({0: 'setosa', 1: 'virginica', 2: 'versicolor'})) + elif dataset == 'Wine': + self.data = load_wine(return_X_y=True, as_frame=True) + elif dataset == "House": + self.data = fetch_california_housing(return_X_y=True, as_frame=True) + else: + ... + print('Done') + self.data = self.data[0].join(self.data[1]) + self.dataInfoBox.reset(dataset, self.data) + self.dataManipulationBox.reset() + self.predictorParamBox.enable() + + def train_model(self, widget): + print(f'Training {self.model_kind}... ', end='') + params = self.predictorParamBox.params + if self.task == 'C': + if self.model_kind == 'KNN': + self.model = KNeighborsClassifier(n_neighbors=params.get('neighbors', 5)) + elif self.model_kind == 'DT': + self.model = DecisionTreeClassifier(max_depth=params.get('depth'), max_leaf_nodes=params.get('leaves')) + else: + ... + else: + if self.model_kind == 'KNN': + self.model = KNeighborsRegressor(n_neighbors=params.get('neighbors', 5)) + elif self.model_kind == 'DT': + self.model = DecisionTreeRegressor(max_depth=params.get('depth'), max_leaf_nodes=params.get('leaves')) + else: + ... + self.train, self.test = train_test_split(self.data, test_size=params.get('test', .5)) + self.model.fit(self.train.iloc[:, :-1], self.train.iloc[:, -1]) + print('Done') + self.predictorInfoBox.reset(self.model) + self.predictorPerformanceInfoBox.reset(self.model, self.test, params.get('test', .5)) + self.extractorParamBox.enable() + + def train_extractor(self, widget): + extractor = self.extractor_kind[self.task] + print(f'Extracting rules from {self.model_kind} with {self.extractor_kind[self.task]}... ', end='') + params = self.extractorParamBox.params + print(params) + if extractor == 'GridEx': + self.extractor = Extractor.gridex( + predictor=self.model, grid=Grid(params.get('depth', 2), FixedStrategy(params.get('splits', 2))), + min_examples=params.get('examples', 200), threshold=params.get('threshold', 0.1) + ) + elif extractor == 'GridREx': + self.extractor = Extractor.gridrex( + predictor=self.model, grid=Grid(params.get('depth', 2), FixedStrategy(params.get('splits', 2))), + min_examples=params.get('examples', 200), threshold=params.get('threshold', 0.1) + ) + elif extractor == 'Iter': + self.extractor = Extractor.iter( + predictor=self.model, min_update=params.get('min_update', .1), n_points=params.get('n_points', 1), + max_iterations=params.get('max_iter', 600), min_examples=params.get('examples', 250), + threshold=params.get('threshold', 0.1), fill_gaps=True + ) + elif extractor == 'Trepan': + self.extractor = Extractor.trepan( + predictor=self.model, discretization=None, min_examples=params.get('examples', 0), + max_depth=params.get('depth', 3), split_logic=None + ) + elif extractor == 'REAL': + self.extractor = Extractor.real(predictor=self.model, discretization=None) + elif extractor == 'CART': + self.extractor = Extractor.cart( + predictor=CartPredictor(self.model), simplify=params.get('simplify', True)) + elif extractor == 'CReEPy': + output = ClusterExtractor.Target.CLASSIFICATION if self.task == 'C' else \ + ClusterExtractor.Target.CONSTANT if params.get('output', False) else ClusterExtractor.Target.REGRESSION + self.extractor = Extractor.creepy( + predictor=self.model, depth=params.get('depth', 3), error_threshold=params.get('threshold', .1), + output=output, gauss_components=params.get('components', 10) + ) + elif extractor == 'CREAM': + output = ClusterExtractor.Target.CLASSIFICATION if self.task == 'C' else \ + ClusterExtractor.Target.CONSTANT if params.get('output', False) else ClusterExtractor.Target.REGRESSION + self.extractor = Extractor.cream( + self.model, depth=params.get('depth', 3), error_threshold=params.get('threshold', .1), + output=output, gauss_components=params.get('components', 10) + ) + else: + ... + print('Done') + self.theory = self.extractor.extract(self.train) + self.extractorInfoBox.reset(self.extractor, self.model) + self.extractorPerformanceInfoBox.reset(self.extractor, self.model, self.test) + + def show_theory(self, widget): + self.manager.show_theory(self.theory) + + +if __name__ == '__main__': + PSyKEApp().run() diff --git a/psyke.kv b/psyke.kv new file mode 100644 index 00000000..e69de29b diff --git a/psyke/__init__.py b/psyke/__init__.py index ceec8ccd..9a63ef89 100644 --- a/psyke/__init__.py +++ b/psyke/__init__.py @@ -153,7 +153,7 @@ def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, return GridREx(predictor, grid, min_examples, threshold, seed) @staticmethod - def cream(predictor, depth: int, error_threshold: float, output, gauss_components: int = 2) -> Extractor: + def cream(predictor, depth: int, error_threshold: float, output, gauss_components: int = 10) -> Extractor: """ Creates a new CREAM extractor. """ @@ -161,7 +161,7 @@ def cream(predictor, depth: int, error_threshold: float, output, gauss_component return CREAM(predictor, depth, error_threshold, output, gauss_components) @staticmethod - def creepy(predictor, depth: int, error_threshold: float, output, gauss_components: int = 2) -> Extractor: + def creepy(predictor, depth: int, error_threshold: float, output, gauss_components: int = 10) -> Extractor: """ Creates a new CReEPy extractor. """ diff --git a/psyke/cart/__init__.py b/psyke/cart/__init__.py index 90676740..ccbf979d 100644 --- a/psyke/cart/__init__.py +++ b/psyke/cart/__init__.py @@ -13,7 +13,7 @@ class Cart(Extractor): def __init__(self, predictor: CartPredictor, discretization: Iterable[DiscreteFeature] = None, simplify: bool = True): super().__init__(predictor, discretization) - self.__simplify = simplify + self.simplify = simplify def __create_body(self, variables: dict[str, Var], constraints: LeafConstraints) -> Iterable[Struct]: results = [] @@ -29,7 +29,7 @@ def __create_body(self, variables: dict[str, Var], constraints: LeafConstraints) def __create_theory(self, data: pd.DataFrame) -> Theory: new_theory = mutable_theory() for name, value in self.predictor: - name = [(n[0], n[1]) for n in name if not self.__simplify or n[2]] + name = [(n[0], n[1]) for n in name if not self.simplify or n[2]] variables = create_variable_list(self.discretization, data) new_theory.assertZ( clause( diff --git a/psyke/classification/real/__init__.py b/psyke/classification/real/__init__.py index 6afecbbf..d9b7588d 100644 --- a/psyke/classification/real/__init__.py +++ b/psyke/classification/real/__init__.py @@ -86,9 +86,6 @@ def __remove_antecedent(self, samples: pd.DataFrame, predicate: str, rule: list[ if len(copies) > 1: rule[0].remove(predicate) return pd.concat([df for df in copies], ignore_index=True) - # if len(samples[np.array(self.predictor.predict(samples)) != "setosa"][samples["SepalLength_0"] == 1]) == 0 - # togliere SL0 - @lru_cache(maxsize=512) def __get_or_set(self, dataset: HashableDataFrame) -> IndexedRuleSet: @@ -97,7 +94,7 @@ def __get_or_set(self, dataset: HashableDataFrame) -> IndexedRuleSet: def __predict(self, sample: pd.Series): x = [index for index, rule in self.__ruleset.flatten() if self.__rule_from_example(sample) in rule] reverse_mapping = dict((v, k) for k, v in self.__output_mapping.items()) - return reverse_mapping[x[0]] if len(x) > 0 else -1 + return reverse_mapping[x[0]] if len(x) > 0 else None # def __remove_antecedents2(self, samples: pd.DataFrame, predicate: str, # mutable_predicates: list[str]) -> pd.DataFrame: @@ -130,4 +127,4 @@ def extract(self, dataframe: pd.DataFrame) -> Theory: return self.__create_theory(dataframe, self.__ruleset) def predict(self, dataframe) -> list: - return [self.__predict(data.transpose()) for _, data in dataframe.iterrows()] \ No newline at end of file + return [self.__predict(data.transpose()) for _, data in dataframe.iterrows()] diff --git a/psyke/clustering/__init__.py b/psyke/clustering/__init__.py index 91f30e2a..a89da060 100644 --- a/psyke/clustering/__init__.py +++ b/psyke/clustering/__init__.py @@ -15,14 +15,14 @@ def __init__(self, predictor, depth: int, error_threshold: float, self.depth = depth self.error_threshold = error_threshold self.gauss_components = gauss_components - self._output = output + self.output = output def extract(self, dataframe: pd.DataFrame) -> Theory: raise NotImplementedError('extract') def _default_cube(self) -> Union[ClosedCube, ClosedRegressionCube, ClosedClassificationCube]: - if self._output == ClusterExtractor.Target.CONSTANT: + if self.output == ClusterExtractor.Target.CONSTANT: return ClosedCube() - if self._output == ClusterExtractor.Target.REGRESSION: + if self.output == ClusterExtractor.Target.REGRESSION: return ClosedRegressionCube() return ClosedClassificationCube() diff --git a/psyke/clustering/creepy/__init__.py b/psyke/clustering/creepy/__init__.py index 23d5f2b9..81ec0924 100644 --- a/psyke/clustering/creepy/__init__.py +++ b/psyke/clustering/creepy/__init__.py @@ -49,12 +49,12 @@ def _create_cube(self, dataframe: pd.DataFrame, clusters: int) -> ClosedCube: dbscan_pred = DBSCAN(eps=select_dbscan_epsilon(data, clusters)).fit_predict(data.iloc[:, :-1]) return HyperCube.create_surrounding_cube( dataframe.iloc[np.where(dbscan_pred == Counter(dbscan_pred).most_common(1)[0][0])], - True, self._output + True, self.output ) def extract(self, dataframe: pd.DataFrame) -> Theory: self._hypercubes = \ - self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output))) + self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self.output))) return self._create_theory(dataframe) @staticmethod diff --git a/psyke/gui/__init__.py b/psyke/gui/__init__.py new file mode 100644 index 00000000..c8265d23 --- /dev/null +++ b/psyke/gui/__init__.py @@ -0,0 +1,452 @@ +import numpy as np +from kivy.uix.boxlayout import BoxLayout +from kivy.uix.button import Button +from kivy.uix.checkbox import CheckBox +from kivy.uix.gridlayout import GridLayout +from kivy.uix.textinput import TextInput +from kivy.uix.label import Label +from functools import partial + +from sklearn.base import ClassifierMixin, RegressorMixin +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score +from sklearn.neighbors._base import NeighborsBase +from sklearn.tree import BaseDecisionTree + +from psyke.cart import Cart +from psyke.classification.real import REAL +from psyke.classification.trepan import Trepan +from psyke.clustering import ClusterExtractor +from psyke.clustering.cream import CREAM +from psyke.clustering.creepy import CReEPy +from psyke.regression.gridex import GridEx +from psyke.regression.gridrex import GridREx +from psyke.regression.iter import ITER + + +def default_action(widget=None, value=None): + pass + + +def button_with_label(group: str, color: list[int, int, int], active: bool, + label: str, action=default_action) -> BoxLayout: + box = GridLayout(cols=2, padding=0) + button = CheckBox(group=group, size_hint_x=.047, size_hint_y=.047, color=color, active=active) + box.add_widget(button) + button.bind(active=action) + box.add_widget(Label(text=label)) + return box + + +def checkbox_with_label(color: list[int, int, int], active: bool, label: str, action=default_action) -> BoxLayout: + box = GridLayout(cols=3, size_hint_y=None, height=25, padding=10) + box.add_widget(Label(text=label)) + box.add_widget(Label()) + button = CheckBox(color=color, active=active) + button.bind(active=action) + box.add_widget(button) + box.add_widget(Label()) + return box + + +def text_with_label(label: str, text: str, filter:str, action) -> BoxLayout: + box = HorizontalBoxLayout(size_hint_y=None, height=30) + box.add_widget(Label(text=label)) + text = TextInput(text=text, input_filter=filter, multiline=False, size_hint_y=None, height=30) + text.bind(text=action) + box.add_widget(text) + return box + + +class VerticalBoxLayout(BoxLayout): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.orientation = 'vertical' + + +class HorizontalBoxLayout(BoxLayout): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.orientation = 'horizontal' + + +class TitleBox(HorizontalBoxLayout): + + def __init__(self, title: str, action=None, button_name=None, disabled=False, **kwargs): + super().__init__(**kwargs) + self.padding = 5 + self.spacing = 15 + self.size_hint_y = None + self.height = 40 + self.add_widget(Label(text=title)) + if action is not None: + self.button = Button(text=button_name, size_hint=(None, None), height=30, width=120, disabled=disabled) + self.button.bind(on_press=action) + self.add_widget(self.button) + + +class TaskBox(VerticalBoxLayout): + + def __init__(self, root, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.root = root + self.add_widget(TitleBox('Select task')) + self.add_widget(Label()) + self.add_widget(button_with_label('task', [1, 1, 1], True, 'Classification', partial(self.set_task, 'C'))) + self.add_widget(button_with_label('task', [1, 1, 1], False, 'Regression', partial(self.set_task, 'R'))) + self.add_widget(Label()) + + def set_task(self, task, widget, value): + if value: + self.root.select_task(task) + + +class DataBox(VerticalBoxLayout): + + def __init__(self, root, classification: list[str], regression: list[str], **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.root = root + self.dataset = {'C': classification[0], 'R': regression[0]} + self.classification = VerticalBoxLayout() + self.regression = VerticalBoxLayout() + for (container, datasets, task) in \ + zip([self.classification, self.regression], [classification, regression], ['C', 'R']): + for i, dataset in enumerate(datasets): + container.add_widget(button_with_label(f'data_{task}', [1, 1, 1], i == 0, dataset, + partial(self.set_dataset, task, dataset))) + self.reset() + + def set_dataset(self, task, dataset, widget, value): + if value: + self.dataset[task] = dataset + + def reset(self): + self.clear_widgets() + self.add_widget(TitleBox('Select dataset', self.root.select_dataset, 'Load')) + self.add_widget(Label()) + self.add_widget(self.classification if self.root.task == 'C' else self.regression) + self.add_widget(Label()) + + +class PredictorBox(VerticalBoxLayout): + + def __init__(self, root, options, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.model = options[0] + self.root = root + self.add_widget(TitleBox('Select predictor')) + self.add_widget(Label()) + for i, option in enumerate(options): + self.add_widget(button_with_label('predictor', [1, 1, 1], i == 0, option, + partial(self.set_model, option))) + self.add_widget(Label()) + + def set_model(self, model, widget, value): + if value: + self.model = model + self.root.select_model() + + +class ExtractorBox(VerticalBoxLayout): + + def __init__(self, root, classifiers: list[str], regressors: list[str], **kwargs): + super().__init__(**kwargs) + self.padding = 20 + #self.spacing = 20 + self.extractor = {'C': classifiers[0], 'R': regressors[0]} + self.root = root + self.classifiers = GridLayout(cols=2) + self.regressors = GridLayout(cols=2) + for (container, extractors, task) in \ + zip([self.classifiers, self.regressors], [classifiers, regressors], ['C', 'R']): + for i, extractor in enumerate(extractors): + container.add_widget(button_with_label(f'extractor_{task}', [1, 1, 1], i == 0, extractor, + partial(self.set_extractor, task, extractor))) + self.reset() + + def set_extractor(self, task, extractor, widget, value): + if value: + self.extractor[task] = extractor + self.root.select_extractor() + + def reset(self, task='C'): + self.clear_widgets() + self.add_widget(TitleBox('Select extractor', size_hint_y=.2)) + self.add_widget(Label(size_hint_y=None, height=30)) + self.add_widget(self.classifiers if task == 'C' else self.regressors) + + +class DataInfoBox(VerticalBoxLayout): + + def __init__(self, name=None, df=None, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.label = Label() + self.add_widget(self.label) + self.reset(name, df) + + def reset(self, name, df): + self.label.text = 'No selected dataset' if name is None else \ + f'Dataset info\n\nDataset: {name}\nInput variables: {len(df.columns) - 1}\nInstances: {len(df)}' + if df is not None and isinstance(df.iloc[0, -1], str): + self.label.text += f'\nClasses: {len(np.unique(df.iloc[:, -1]))}' + + +class ParameterBox(VerticalBoxLayout): + + def __init__(self, root, title, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.params = {} + self.root = root + self.titleBox = title + self.reset() + + def reset(self): + self.clear_widgets() + self.params = {} + self.add_widget(self.titleBox) + + def enable(self): + self.titleBox.button.disabled = False + + def disable(self): + self.titleBox.button.disabled = True + + def set_param(self, key, widget, value): + if value == '': + try: + del self.params[key] + except KeyError: + pass + else: + try: + self.params[key] = int(value) + except ValueError: + self.params[key] = float(value) + + def set_param_checkbox(self, key, widget, value): + self.params[key] = bool(value) + + +class PredictorParameterBox(ParameterBox): + + def __init__(self, root, disabled=True, **kwargs): + super().__init__(root, TitleBox('Predictor', root.train_model, 'Train', disabled), **kwargs) + + def reset(self): + super().reset() + self.add_widget(Label()) + self.add_widget(text_with_label('Test set', '', 'float', partial(self.set_param, 'test'))) + self.add_widget(Label()) + if self.root.model_kind == 'KNN': + self.add_widget(text_with_label('N neighbors', '', 'int', partial(self.set_param, 'neighbors'))) + elif self.root.model_kind == 'DT': + self.add_widget(text_with_label('Max depth', '', 'int', partial(self.set_param, 'depth'))) + self.add_widget(text_with_label('Max leaves', '', 'int', partial(self.set_param, 'leaves'))) + self.add_widget(Label()) + + +class ExtractorParameterBox(ParameterBox): + + def __init__(self, root, disabled=True, **kwargs): + super().__init__(root, TitleBox('Extractor', root.train_extractor, 'Fit', disabled), **kwargs) + self.simplify = checkbox_with_label([1, 1, 1], True, 'Simplify theory', + partial(self.set_param_checkbox, 'simplify')) + self.depth = text_with_label('Max depth', '', 'int', partial(self.set_param, 'depth')) + self.splits = text_with_label('Number of splits', '', 'int', partial(self.set_param, 'splits')) + self.examples = text_with_label('Min examples', '', 'int', partial(self.set_param, 'min_examples')) + self.threshold = text_with_label('Threshold', '', 'float', partial(self.set_param, 'threshold')) + self.output = checkbox_with_label([1, 1, 1], True, 'Constant output', + partial(self.set_param_checkbox, 'output')) + self.components = text_with_label('N components', '', 'int', partial(self.set_param, 'components')) + self.update = text_with_label('Min update', '', 'float', partial(self.set_param, 'min_update')) + self.points = text_with_label('Initial points', '', 'int', partial(self.set_param, 'n_points')) + self.iterations = text_with_label('Max iterations', '', 'int', partial(self.set_param, 'max_iter')) + + def reset(self): + super().reset() + self.add_widget(Label()) + extractor = self.root.extractor_kind[self.root.task] + if extractor == 'REAL': + self.add_widget(Label(text='No parameters required')) + elif extractor == 'CART': + self.add_widget(self.simplify) + else: + if extractor in ['GridEx', 'GridREx', 'Trepan', 'CReEPy', 'CREAM']: + self.add_widget(self.depth) + if extractor in ['GridEx', 'GridREx']: + self.add_widget(self.splits) + if extractor in ['GridEx', 'GridREx', 'Iter', 'Trepan']: + self.add_widget(self.examples) + if extractor in ['GridEx', 'GridREx', 'Iter', 'CReEPy', 'CREAM']: + self.add_widget(self.threshold) + if extractor in ['CReEPy', 'CREAM']: + if self.root.task == 'R': + self.add_widget(self.output) + self.add_widget(self.components) + if extractor == 'Iter': + self.add_widget(self.update) + self.add_widget(self.points) + self.add_widget(self.iterations) + self.add_widget(Label()) + + +class InfoBox(VerticalBoxLayout): + + def __init__(self, item=None, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.label = Label(text='') + self.add_widget(Label()) + self.add_widget(self.label) + self.add_widget(Label()) + self.reset(item) + + +class PredictorInfoBox(InfoBox): + + def __init__(self, model=None, **kwargs): + super().__init__(model, **kwargs) + + def reset(self, model=None): + if model is None: + self.label.text = 'No trained predictor' + else: + self.label.text = 'Predictor info\n\n' + if isinstance(model, BaseDecisionTree): + self.label.text += f'Predictor: Decision Tree\nMax leaves: {model.max_leaf_nodes}\n' \ + f'Max depth: {model.max_depth}' + elif isinstance(model, NeighborsBase): + self.label.text += f'Predictor: KNN\nNeighbors: {model.n_neighbors}' + else: + ... + + +class PredictorPerformanceInfoBox(InfoBox): + + def __init__(self, model=None, **kwargs): + super().__init__(model, **kwargs) + + def reset(self, model=None, test=None, test_quota=None): + if model is None: + self.label.text = 'No trained predictor' + else: + self.label.text = f'Predictor Performance\n\nTest set: {test_quota}{" (%)" if test_quota < 1 else ""}\n\n' + true = test.iloc[:, -1] + predicted = model.predict(test.iloc[:, :-1]) + if isinstance(model, ClassifierMixin): + self.label.text += f'Accuracy: {accuracy_score(true, predicted):.2f}' + elif isinstance(model, RegressorMixin): + self.label.text += f'MAE: {mean_absolute_error(true, predicted):.2f}\n' \ + f'MSE: {mean_squared_error(true, predicted):.2f}\n' \ + f'R2: {r2_score(true, predicted):.2f}' + else: + ... + + +class ExtractorInfoBox(InfoBox): + + def __init__(self, extractor=None, **kwargs): + super().__init__(extractor, **kwargs) + + def reset(self, extractor=None, model=None): + if extractor is None: + self.label.text = 'No fitted extractor' + else: + self.label.text = 'Extractor info\n\n' + if isinstance(extractor, Trepan): + self.label.text += f'Extractor: Trepan\n' \ + f'Max depth: {extractor.max_depth}\nMin examples: {extractor.min_examples}' + elif isinstance(extractor, REAL): + self.label.text += f'Extractor: REAL' + elif isinstance(extractor, ITER): + self.label.text += f'Predictor: Iter\nMin Examples: {extractor.min_examples}\n' \ + f'Threshold: {extractor.threshold}\nMin update: {extractor.min_update}\n' \ + f'Initial points: {extractor.n_points}\nMax iterations: {extractor.max_iterations}' + elif isinstance(extractor, Cart): + self.label.text += f'Extractor: CART\nSimplify theory: {extractor.simplify}' + elif isinstance(extractor, GridEx) or isinstance(extractor, GridREx): + if isinstance(extractor, GridEx): + self.label.text += f'Extractor: GridEx\n' + else: + self.label.text += f'Extractor: GridREx\n' + self.label.text += f'Max depth: {extractor.grid.iterations}\n' \ + f'Number of splits: Fixed\nMin examples: {extractor.min_examples}\n' \ + f'Threshold: {extractor.threshold}' + elif isinstance(extractor, CReEPy) or isinstance(extractor, CREAM): + if isinstance(extractor, CReEPy): + self.label.text += f'Extractor: CReEPy\n' + else: + self.label.text += f'Extractor: CREAM\n' + self.label.text += f'Max depth: {extractor.depth}\nThreshold: {extractor.error_threshold}\n' + if isinstance(model, RegressorMixin): + self.label.text += f'Constant output: {extractor.output == ClusterExtractor.Target.CONSTANT}\n' + self.label.text += f'N components: {extractor.gauss_components}' + else: + ... + + +class ExtractorPerformanceInfoBox(InfoBox): + + def __init__(self, extractor=None, action=default_action(), **kwargs): + button_box = HorizontalBoxLayout() + button_box.add_widget(Label()) + self.button = Button(text='Show theory', size_hint_y=None, height=30, disabled=True) + self.button.bind(on_press=action) + button_box.add_widget(self.button) + button_box.add_widget(Label()) + super().__init__(extractor, **kwargs) + self.add_widget(Label()) + self.add_widget(button_box) + self.add_widget(Label()) + + def reset(self, extractor=None, model=None, test=None): + if extractor is None: + self.label.text = 'No fitted extractor' + self.button.disabled = True + else: + self.button.disabled = False + self.label.text = 'Extractor performance\n\n' + extracted = extractor.predict(test.iloc[:, :-1]) + idx = np.array([e is not None for e in extracted]) + extracted = extracted[idx] + true = test.iloc[idx, -1] + predicted = model.predict(test.iloc[idx, :-1]) + + self.label.text += f'Extracted rules: {extractor.n_rules}\n\n' + if isinstance(model, ClassifierMixin): + self.label.text += f'Accuracy: {accuracy_score(true, predicted):.2f} (model) ' \ + f'{accuracy_score(predicted, extracted):.2f} (fidelity)' + elif isinstance(model, RegressorMixin): + self.label.text += f'MAE: {mean_absolute_error(true, extracted):.2f} (model) ' \ + f'{mean_absolute_error(predicted, extracted):.2f} (fidelity)\n' \ + f'MSE: {mean_squared_error(true, extracted):.2f} (model) ' \ + f'{mean_squared_error(predicted, extracted):.2f} (fidelity)\n' \ + f'R2: {r2_score(true, extracted):.2f} (model) ' \ + f'{r2_score(predicted, extracted):.2f} (fidelity)' + else: + ... + + +class DataManipulationBox(VerticalBoxLayout): + + def __init__(self, root, **kwargs): + super().__init__(**kwargs) + self.padding = 20 + self.root = root + self.reset() + + def reset(self): + self.clear_widgets() + if self.root.data is None: + self.add_widget(Label(text='No selected dataset')) + else: + self.add_widget(Label(text='Options')) + self.add_widget(button_with_label('', [1, 1, 1], False, 'Discretise') if self.root.task == 'C' else + button_with_label('', [1, 1, 1], False, 'Normalise')) + self.add_widget(Label()) diff --git a/psyke/regression/__init__.py b/psyke/regression/__init__.py index 91c7a5f6..a1c2a1fe 100644 --- a/psyke/regression/__init__.py +++ b/psyke/regression/__init__.py @@ -25,7 +25,7 @@ class Target(Enum): def __init__(self, predictor): super().__init__(predictor) self._hypercubes = [] - self._output = HyperCubeExtractor.Target.CONSTANT + self.output = HyperCubeExtractor.Target.CONSTANT def extract(self, dataframe: pd.DataFrame) -> Theory: raise NotImplementedError('extract') @@ -33,17 +33,17 @@ def extract(self, dataframe: pd.DataFrame) -> Theory: def predict(self, dataframe: pd.DataFrame) -> Iterable: return np.array([self._predict(dict(row.to_dict())) for _, row in dataframe.iterrows()]) - def _predict(self, data: dict[str, float]) -> float: + def _predict(self, data: dict[str, float]) -> float | None: data = {k: v for k, v in data.items()} for cube in self._hypercubes: - if cube.__contains__(data): + if data in cube: return HyperCubeExtractor._get_cube_output(cube, data) - return np.nan + return None def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube: - if self._output == HyperCubeExtractor.Target.CONSTANT: + if self.output == HyperCubeExtractor.Target.CONSTANT: return HyperCube() - if self._output == HyperCubeExtractor.Target.REGRESSION: + if self.output == HyperCubeExtractor.Target.REGRESSION: return RegressionCube() return ClassificationCube() diff --git a/psyke/regression/gridex/__init__.py b/psyke/regression/gridex/__init__.py index 62f859f6..fad69b0c 100644 --- a/psyke/regression/gridex/__init__.py +++ b/psyke/regression/gridex/__init__.py @@ -28,8 +28,8 @@ def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, def extract(self, dataframe: pd.DataFrame) -> Theory: if isinstance(np.array(self.predictor.predict(dataframe.iloc[0:1, :-1])).flatten()[0], str): - self._output = HyperCubeExtractor.Target.CLASSIFICATION - surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output) + self.output = HyperCubeExtractor.Target.CLASSIFICATION + surrounding = HyperCube.create_surrounding_cube(dataframe, output=self.output) surrounding.init_std(2 * self.threshold) self._iterate(surrounding, dataframe) return self._create_theory(dataframe) @@ -100,7 +100,7 @@ def __evaluate_merge(self, not_in_cache: Iterable[HyperCube], merged_cube = cube.merge_along_dimension(other_cube, feature) merged_cube.update(dataframe, self.predictor) merge_cache[(cube, other_cube)] = merged_cube - return cube.output == other_cube.output if self._output == HyperCubeExtractor.Target.CLASSIFICATION else \ + return cube.output == other_cube.output if self.output == HyperCubeExtractor.Target.CLASSIFICATION else \ merge_cache[(cube, other_cube)].diversity < self.threshold def _merge(self, to_split: Iterable[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]: diff --git a/requirements.txt b/requirements.txt index 5c2c179f..7ea8247d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ onnxruntime~=1.9.0 parameterized>=0.8.1 protobuf~=3.19.0 setuptools~=57.0.0 -kneed>=0.7.0 \ No newline at end of file +kneed>=0.7.0 +kivy>=2.1.0 \ No newline at end of file diff --git a/test/psyke/classification/real/test_real.py b/test/psyke/classification/real/test_real.py index cac9124e..c4a95b57 100644 --- a/test/psyke/classification/real/test_real.py +++ b/test/psyke/classification/real/test_real.py @@ -24,7 +24,7 @@ def test_predict(self): substitutions = [solver.solveOnce(data_to_struct(data)) for _, data in self.test_set.iterrows()] index = self.test_set.shape[1] - 1 - expected = [str(query.solved_query.get_arg_at(index)) if query.is_yes else -1 for query in substitutions] + expected = [str(query.solved_query.get_arg_at(index)) if query.is_yes else None for query in substitutions] logger.info(predictions) logger.info(expected) diff --git a/test/psyke/regression/gridex/test_gridex.py b/test/psyke/regression/gridex/test_gridex.py index 52113f1b..13c4182e 100644 --- a/test/psyke/regression/gridex/test_gridex.py +++ b/test/psyke/regression/gridex/test_gridex.py @@ -32,6 +32,8 @@ def test_extract(self): def test_predict(self): predictions = self.extractor.predict(self.test_set.iloc[:, :-1]) + idx = [pred is not None for pred in predictions] + predictions = predictions[idx] # Handle both classification and regression. if not isinstance(predictions[0], str): @@ -40,7 +42,9 @@ def test_predict(self): solver = prolog_solver(static_kb=self.extracted_theory.assertZ(get_in_rule())) substitutions = [solver.solveOnce(data_to_struct(data)) for _, data in self.test_set.iterrows()] index = self.test_set.shape[1] - 1 - expected = [query.solved_query.get_arg_at(index) if query.is_yes else '-1' for query in substitutions] + expected = np.array( + [query.solved_query.get_arg_at(index) if query.is_yes else '-1' for query in substitutions] + )[idx] expected = [str(x) for x in expected] if isinstance(predictions[0], str) else \ [float(x) if isinstance(x, str) else float(x.value) for x in expected] @@ -49,11 +53,9 @@ def test_predict(self): In python nan == nan is always False so for this test we do not consider them. ''' if isinstance(predictions[0], str): - self.assertTrue(all([pred == exp if exp != "-1" else True for (pred, exp) in zip(predictions, expected)])) else: - idx = np.isnan(predictions) - self.assertTrue(max(abs(predictions[~idx] - np.array(expected)[~idx])) < get_default_precision()) + self.assertTrue(max(abs(predictions - expected)) < get_default_precision()) if __name__ == '__main__': diff --git a/test/psyke/regression/iter/test_iter.py b/test/psyke/regression/iter/test_iter.py index ff0fa702..58524430 100644 --- a/test/psyke/regression/iter/test_iter.py +++ b/test/psyke/regression/iter/test_iter.py @@ -38,8 +38,8 @@ def test_predict(self): ITER is not exhaustive so all entry's predictions that are not inside an hypercube are nan. In python nan == nan is always False so for this test we do not consider them. ''' - idx = np.isnan(predictions) - self.assertTrue(max(abs(predictions[~idx] - expected[~idx])) < get_default_precision()) + idx = [pred is not None for pred in predictions] + self.assertTrue(max(abs(predictions[idx] - expected[idx])) < get_default_precision()) if __name__ == '__main__':