Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/feature/gui' into feature/gui
Browse files Browse the repository at this point in the history
# Conflicts:
#	demo/DemoClassification.ipynb
#	gui.py
#	psyke/__init__.py
#	psyke/clustering/__init__.py
#	psyke/clustering/exact/__init__.py
#	psyke/extraction/hypercubic/__init__.py
#	psyke/extraction/hypercubic/gridex/__init__.py
#	psyke/extraction/real/__init__.py
#	requirements.txt
#	test/psyke/classification/real/test_real.py
#	test/psyke/regression/gridex/test_gridex.py
#	test/psyke/regression/iter/test_iter.py
  • Loading branch information
sabbatinif committed Aug 10, 2022
2 parents 04bc402 + e2f8c2c commit 2f8c1c5
Show file tree
Hide file tree
Showing 12 changed files with 320 additions and 331 deletions.
249 changes: 49 additions & 200 deletions demo/DemoClassification.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def select_dataset(self, widget):
print(f'Loading {dataset}... ', end='')
if dataset == 'Iris':
x, y = load_iris(return_X_y=True, as_frame=True)
self.data = (x, y.replace({0: 'setosa', 1: 'versicolor', 2: 'virginica'}))
self.data = (x, y.replace({0: 'setosa', 1: 'virginica', 2: 'versicolor'}))
elif dataset == 'Wine':
self.data = load_wine(return_X_y=True, as_frame=True)
elif dataset == "House":
Expand Down
75 changes: 28 additions & 47 deletions psyke/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from __future__ import annotations

import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, f1_score, accuracy_score

import psyke
from psyke.schema import DiscreteFeature
from psyke.utils import get_default_random_seed
from tuprolog.theory import Theory
from typing import Iterable
import logging


logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('psyke')

Expand Down Expand Up @@ -56,7 +57,7 @@ def mae(self, dataframe: pd.DataFrame, predictor=None) -> float:
:return: the mean absolute error (MAE) of the predictions.
"""
predictions = np.array(self.predict(dataframe.iloc[:, :-1]))
idx = [prediction is not None for prediction in predictions]
idx = ~np.isnan(predictions)
return mean_absolute_error(dataframe.iloc[idx, -1] if predictor is None else
predictor.predict(dataframe.iloc[idx, :-1]).flatten(),
predictions[idx])
Expand All @@ -70,7 +71,7 @@ def mse(self, dataframe: pd.DataFrame, predictor=None) -> float:
:return: the mean squared error (MSE) of the predictions.
"""
predictions = np.array(self.predict(dataframe.iloc[:, :-1]))
idx = [prediction is not None for prediction in predictions]
idx = ~np.isnan(predictions)
return mean_squared_error(dataframe.iloc[idx, -1] if predictor is None else
predictor.predict(dataframe.iloc[idx, :-1]).flatten(),
predictions[idx])
Expand All @@ -84,7 +85,7 @@ def r2(self, dataframe: pd.DataFrame, predictor=None) -> float:
:return: the R2 score of the predictions.
"""
predictions = np.array(self.predict(dataframe.iloc[:, :-1]))
idx = [prediction is not None for prediction in predictions]
idx = ~np.isnan(predictions)
return r2_score(dataframe.iloc[idx, -1] if predictor is None else
predictor.predict(dataframe.iloc[idx, :-1]).flatten(),
predictions[idx])
Expand All @@ -98,10 +99,9 @@ def accuracy(self, dataframe: pd.DataFrame, predictor=None) -> float:
:return: the accuracy classification score of the predictions.
"""
predictions = np.array(self.predict(dataframe.iloc[:, :-1]))
idx = [prediction is not None for prediction in predictions]
return accuracy_score(dataframe.iloc[idx, -1] if predictor is None else
predictor.predict(dataframe.iloc[idx, :-1]).flatten(),
predictions[idx])
return accuracy_score(dataframe.iloc[:, -1] if predictor is None else
predictor.predict(dataframe.iloc[:, :-1]).flatten(),
predictions)

def f1(self, dataframe: pd.DataFrame, predictor=None) -> float:
"""
Expand All @@ -112,43 +112,26 @@ def f1(self, dataframe: pd.DataFrame, predictor=None) -> float:
:return: the F1 score of the predictions.
"""
predictions = np.array(self.predict(dataframe.iloc[:, :-1]))
idx = [prediction is not None for prediction in predictions]
return f1_score(dataframe.iloc[idx, -1] if predictor is None else
predictor.predict(dataframe.iloc[idx, :-1]).flatten(),
predictions[idx])

@staticmethod
def exact(depth: int, error_threshold: float, output, gauss_components: int = 2):
"""
Creates a new ExACT instance.
"""
from psyke.clustering.exact import ExACT
return ExACT(depth, error_threshold, output, gauss_components)
return f1_score(dataframe.iloc[:, -1] if predictor is None else
predictor.predict(dataframe.iloc[:, :-1]).flatten(),
predictions)

@staticmethod
def cream(depth: int, error_threshold: float, output, gauss_components: int = 2):
"""
Creates a new CREAM instance.
"""
from psyke.clustering.cream import CREAM
return CREAM(depth, error_threshold, output, gauss_components)

@staticmethod
def cart(predictor, max_depth: int = 3, max_leaves: int = 3,
discretization: Iterable[DiscreteFeature] = None, simplify: bool = True) -> Extractor:
def cart(predictor: psyke.cart.CartPredictor, discretization: Iterable[DiscreteFeature] = None,
simplify: bool = True) -> Extractor:
"""
Creates a new Cart extractor.
"""
from psyke.extraction.cart import Cart
return Cart(predictor, max_depth, max_leaves, discretization=discretization, simplify=simplify)
from psyke.cart import Cart
return Cart(predictor, discretization, simplify)

@staticmethod
def iter(predictor, min_update: float = 0.1, n_points: int = 1, max_iterations: int = 600, min_examples: int = 250,
threshold: float = 0.1, fill_gaps: bool = True, seed: int = get_default_random_seed()) -> Extractor:
"""
Creates a new ITER extractor.
"""
from psyke.extraction.hypercubic.iter import ITER
from psyke.regression.iter import ITER
return ITER(predictor, min_update, n_points, max_iterations, min_examples, threshold, fill_gaps, seed)

@staticmethod
Expand All @@ -157,7 +140,7 @@ def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
"""
Creates a new GridEx extractor.
"""
from psyke.extraction.hypercubic.gridex import GridEx
from psyke.regression.gridex import GridEx
return GridEx(predictor, grid, min_examples, threshold, seed)

@staticmethod
Expand All @@ -166,33 +149,31 @@ def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
"""
Creates a new GridREx extractor.
"""
from psyke.extraction.hypercubic.gridrex import GridREx
from psyke.regression.gridrex import GridREx
return GridREx(predictor, grid, min_examples, threshold, seed)

@staticmethod
def creepy(predictor, depth: int, error_threshold: float, output, gauss_components: int = 2,
ranks: [(str, float)] = [], ignore_threshold: float = 0.0) -> Extractor:
def cream(predictor, depth: int, error_threshold: float, output, gauss_components: int = 10) -> Extractor:
"""
Creates a new CReEPy extractor.
Creates a new CREAM extractor.
"""
from psyke.extraction.hypercubic.creepy import CReEPy
return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold)
from psyke.clustering.cream import CREAM
return CREAM(predictor, depth, error_threshold, output, gauss_components)

@staticmethod
def orchid(predictor, depth: int, error_threshold: float, output, gauss_components: int = 2,
ranks: [(str, float)] = [], ignore_threshold: float = 0.0) -> Extractor:
def creepy(predictor, depth: int, error_threshold: float, output, gauss_components: int = 10) -> Extractor:
"""
Creates a new ORCHiD extractor.
Creates a new CReEPy extractor.
"""
from psyke.extraction.hypercubic.orchid import ORCHiD
return ORCHiD(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold)
from psyke.clustering.creepy import CReEPy
return CReEPy(predictor, depth, error_threshold, output, gauss_components)

@staticmethod
def real(predictor, discretization=None) -> Extractor:
"""
Creates a new REAL extractor.
"""
from psyke.extraction.real import REAL
from psyke.classification.real import REAL
return REAL(predictor, [] if discretization is None else discretization)

@staticmethod
Expand All @@ -201,7 +182,7 @@ def trepan(predictor, discretization=None, min_examples: int = 0, max_depth: int
"""
Creates a new Trepan extractor.
"""
from psyke.extraction.trepan import Trepan, SplitLogic
from psyke.classification.trepan import Trepan, SplitLogic
if split_logic is None:
split_logic = SplitLogic.DEFAULT
return Trepan(predictor, [] if discretization is None else discretization, min_examples, max_depth, split_logic)
17 changes: 9 additions & 8 deletions psyke/clustering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@
import pandas as pd
from tuprolog.theory import Theory

from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube, ClosedCube
from psyke.utils import Target
from psyke.regression import HyperCubeExtractor
from psyke.regression.hypercube import ClosedClassificationCube, ClosedCube, ClosedRegressionCube


class InterpretableClustering:
class ClusterExtractor(HyperCubeExtractor):

def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 2):
def __init__(self, predictor, depth: int, error_threshold: float,
output: HyperCubeExtractor.Target = HyperCubeExtractor.Target.CONSTANT, gauss_components: int = 2):
super().__init__(predictor)
self.depth = depth
self.error_threshold = error_threshold
self.gauss_components = gauss_components
self._output = output
self._hypercubes = []
self.output = output

def extract(self, dataframe: pd.DataFrame) -> Theory:
raise NotImplementedError('extract')

def _default_cube(self) -> Union[ClosedCube, ClosedRegressionCube, ClosedClassificationCube]:
if self._output == Target.CONSTANT:
if self.output == ClusterExtractor.Target.CONSTANT:
return ClosedCube()
if self._output == Target.REGRESSION:
if self.output == ClusterExtractor.Target.REGRESSION:
return ClosedRegressionCube()
return ClosedClassificationCube()
49 changes: 22 additions & 27 deletions psyke/clustering/exact/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,26 @@
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from psyke.clustering import InterpretableClustering
from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
from tuprolog.theory import Theory
from psyke.clustering import ClusterExtractor
from psyke.regression import Node, ClosedCube, HyperCube
from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
from psyke.utils import Target


class ExACT(InterpretableClustering):
class CReEPy(ClusterExtractor):
"""
Explanator implementing ExACT algorithm.
Explanator implementing CReEPy algorithm.
"""

def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5):
super().__init__(depth, error_threshold, output, gauss_components)
self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
self._predictor.n_neighbors = 1
def __init__(self, predictor, depth: int, error_threshold: float,
output: ClusterExtractor.Target = ClusterExtractor.Target.CONSTANT, gauss_components: int = 5):
super().__init__(predictor, depth, error_threshold, output, gauss_components)

def _split(self, right: ClosedCube, outer_cube: ClosedCube, data: pd.DataFrame, indices: np.ndarray):
right.update(data.iloc[indices], self.predictor)
left = outer_cube.copy()
left.update(data.iloc[~indices], self.predictor)
return right, left

def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
cubes = []
Expand All @@ -42,25 +45,17 @@ def _indices(cube: ClosedCube, data: pd.DataFrame) -> np.ndarray | None:
return indices

def _create_cube(self, dataframe: pd.DataFrame, clusters: int) -> ClosedCube:
data = ExACT._remove_string_label(dataframe)
data = CReEPy._remove_string_label(dataframe)
dbscan_pred = DBSCAN(eps=select_dbscan_epsilon(data, clusters)).fit_predict(data.iloc[:, :-1])
return HyperCube.create_surrounding_cube(
dataframe.iloc[np.where(dbscan_pred == Counter(dbscan_pred).most_common(1)[0][0])],
True, self._output
True, self.output
)

def extract(self, dataframe: pd.DataFrame) -> Iterable[HyperCube]:
self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
def extract(self, dataframe: pd.DataFrame) -> Theory:
self._hypercubes = \
self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
return list(self._hypercubes)

def print(self):
for cube in self._hypercubes:
print(f'Output is {cube.output} if:')
for feature in cube.dimensions:
lower, upper = cube[feature]
print(f' {feature} is in [{lower:.2f}, {upper:.2f}]')
self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self.output)))
return self._create_theory(dataframe)

@staticmethod
def _remove_string_label(dataframe: pd.DataFrame):
Expand All @@ -73,7 +68,7 @@ def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
while len(to_split) > 0:
to_split.sort(reverse=True)
(_, depth, _, node) = to_split.pop()
data = ExACT._remove_string_label(node.dataframe)
data = CReEPy._remove_string_label(node.dataframe)
gauss_params = select_gaussian_mixture(data, self.gauss_components)
gauss_pred = gauss_params[2].predict(data)
cubes, indices = self.__eligible_cubes(gauss_pred, node, gauss_params[1])
Expand All @@ -83,9 +78,9 @@ def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
continue
_, _, _, indices, cube = max(cubes)

cube.update(node.dataframe[indices], self._predictor)
cube.update(node.dataframe[indices], self.predictor)
node.right = Node(node.dataframe[indices], cube)
node.cube.update(node.dataframe[~indices], self._predictor)
node.cube.update(node.dataframe[~indices], self.predictor)
node.left = Node(node.dataframe[~indices], node.cube)

if depth < self.depth and cube.diversity > self.error_threshold:
Expand Down
33 changes: 19 additions & 14 deletions psyke/extraction/hypercubic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from __future__ import annotations

from enum import Enum
from typing import Iterable
import numpy as np
import pandas as pd
Expand All @@ -7,18 +9,23 @@
from tuprolog.core import Var, Struct, clause
from tuprolog.theory import Theory, mutable_theory
from psyke import Extractor, logger
from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube
from psyke.regression.strategy import FixedStrategy, Strategy
from psyke.regression.utils import Limit, MinUpdate, ZippedDimension, Expansion
from psyke.utils.logic import create_variable_list, create_head, to_var
from psyke.utils import Target, get_int_precision
from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
from psyke.regression.hypercube import HyperCube, ClosedCube, RegressionCube, ClosedRegressionCube, ClassificationCube


class HyperCubeExtractor(Extractor):

class Target(Enum):
CLASSIFICATION = 1,
CONSTANT = 2,
REGRESSION = 3

def __init__(self, predictor):
super().__init__(predictor)
self._hypercubes = []
self._output = Target.CONSTANT
self.output = HyperCubeExtractor.Target.CONSTANT

def extract(self, dataframe: pd.DataFrame) -> Theory:
raise NotImplementedError('extract')
Expand All @@ -29,17 +36,14 @@ def predict(self, dataframe: pd.DataFrame) -> Iterable:
def _predict(self, data: dict[str, float]) -> float | None:
data = {k: v for k, v in data.items()}
for cube in self._hypercubes:
if cube.__contains__(data):
if self._output == Target.CLASSIFICATION:
return HyperCubeExtractor._get_cube_output(cube, data)
else:
return round(HyperCubeExtractor._get_cube_output(cube, data), get_int_precision())
if data in cube:
return HyperCubeExtractor._get_cube_output(cube, data)
return None

def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
if self._output == Target.CONSTANT:
if self.output == HyperCubeExtractor.Target.CONSTANT:
return HyperCube()
if self._output == Target.REGRESSION:
if self.output == HyperCubeExtractor.Target.REGRESSION:
return RegressionCube()
return ClassificationCube()

Expand All @@ -49,7 +53,7 @@ def _get_cube_output(cube: HyperCube | RegressionCube, data: dict[str, float]) -
isinstance(cube, RegressionCube) else cube.output

@staticmethod
def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
def __create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
return create_head(dataframe.columns[-1], variables[:-1], output) \
if not isinstance(output, LinearRegression) else \
create_head(dataframe.columns[-1], variables[:-1], variables[-1])
Expand All @@ -59,13 +63,14 @@ def _ignore_dimensions(self) -> Iterable[str]:

def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
new_theory = mutable_theory()
ignore_dimensions = self._ignore_dimensions()
for cube in self._hypercubes:
logger.info(cube.output)
logger.info(cube.dimensions)
variables = create_variable_list([], dataframe)
variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
head = HyperCubeExtractor._create_head(dataframe, list(variables.values()), cube.output)
body = cube.body(variables, self._ignore_dimensions())
head = HyperCubeExtractor.__create_head(dataframe, list(variables.values()), cube.output)
body = cube.body(variables, ignore_dimensions)
new_theory.assertZ(clause(head, body))
return new_theory

Expand Down
Loading

0 comments on commit 2f8c1c5

Please sign in to comment.