GispoCoding · dipak5053 · Oct 13, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
+.idea/
 *.py[cod]
 *$py.class
 

diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/eis_toolkit/deep_learning/__init__.py b/eis_toolkit/deep_learning/__init__.py
diff --git a/eis_toolkit/deep_learning/mlp_function.py b/eis_toolkit/deep_learning/mlp_function.py
@@ -0,0 +1,95 @@
+import numpy as np
+from sklearn.neural_network import MLPClassifier
+
+from eis_toolkit.exceptions import InvalidArgumentTypeException, InvalidDatasetException
+from eis_toolkit.model_performance_estimation.model_performance_estimation import performance_model_estimation
+
+
+def train_evaluate_predict_with_mlp(
+    dataset: np.ndarray,
+    labels: np.ndarray,
+    cross_validation_type: str,
+    number_of_split: int,
+    is_class_probability: bool = False,
+    threshold_probability: float = None,
+    is_predict_full_map: bool = False,
+    solver: str = "adam",
+    alpha: float = 0.001,
+    hidden_layer_sizes: tuple[int, int] = (16, 2),
+    random_state=0,
+) -> np.ndarray:
+    """
+    Do the training - evaluation - predictions steps with MLP.
+
+    Parameters:
+        dataset: Features data.
+        labels: Labels data.
+        cross_validation_type: selected cross validation method.
+        number_of_split: number of split to divide the dataset.
+        is_class_probability: if True the code return probability, otherwise it return class.
+        is_predict_full_map: if True the function will predict the full dataset otherwise predict only the te4st fold.
+        threshold_probability: works only if is_class_probability is True, is thresholds of probability.
+        solver: this is what in keras is called optimizer.
+        alpha: floating point represent regularization.
+        hidden_layer_sizes: It represents the number of neurons in the ith hidden layer.
+        random_state: random state for repeatability of results.
+    Return:
+        a numpy array with prediction (class if is_class_probability is set to false otherwise it return probability).
+    Raises:
+        InvalidDatasetException: When the dataset is None.
+        InvalidArgumentTypeException when the function try to make probability and the threshold is None.
+    """
+
+    # I need two local vars
+    best_score = 0
+    best_handler_list = list()
+
+    if is_class_probability is not False and threshold_probability is None:
+        raise InvalidArgumentTypeException
+
+    if dataset is None or labels is None:
+        raise InvalidDatasetException
+
+    # select the cross validation method you need
+    selected_cross_validation = performance_model_estimation(
+        cross_validation_type=cross_validation_type, number_of_split=number_of_split
+    )
+    # start the training process
+    for fold_number, (train_index, test_index) in enumerate(selected_cross_validation.split(dataset, labels)):
+
+        # let's make an instance of classifier
+        classifier = MLPClassifier(
+            solver=solver, alpha=alpha, hidden_layer_sizes=hidden_layer_sizes, random_state=random_state
+        )
+
+        # train the classifier
+        classifier.fit(dataset[train_index], labels[train_index])
+        # score
+        fold_score = classifier.score(dataset[test_index], labels[test_index])
+
+        if fold_number == 0:
+            best_score = fold_score
+            best_handler_list = [classifier, dataset[test_index]]
+        else:
+            if best_score < fold_score:
+                best_score = fold_score
+                best_handler_list = [classifier, dataset[test_index]]
+
+    # assign to classifier and data a vars I do not like see to much indexing
+    classifier = best_handler_list[0]
+
+    if not is_predict_full_map:
+        data = best_handler_list[1]
+    else:
+        data = dataset
+
+    if not is_class_probability:
+        # predict class
+        prediction = classifier.predict(data)
+    else:
+        # predict proba
+        prediction = classifier.predict_proba(data)
+        # assign proba to threshold
+        prediction[prediction >= threshold_probability] = 1
+
+    return prediction
diff --git a/eis_toolkit/exceptions.py b/eis_toolkit/exceptions.py
@@ -72,3 +72,15 @@ class NonSquarePixelSizeException(Exception):
 
 class NumericValueSignException(Exception):
     """Exception error class for numeric value sign exception."""
+
+
+class InvalidCrossValidationSelected(Exception):
+    """Exception thrown when a not valid cv is selected."""
+
+
+class InvalidNumberOfSplit(Exception):
+    """Exception throws when number of split is incompatible."""
+
+
+class InvalidDatasetException(Exception):
+    """Exception throws when the dataset is null."""
diff --git a/eis_toolkit/model_performance_estimation/__init__.py b/eis_toolkit/model_performance_estimation/__init__.py
diff --git a/eis_toolkit/model_performance_estimation/model_performance_estimation.py b/eis_toolkit/model_performance_estimation/model_performance_estimation.py
@@ -0,0 +1,35 @@
+import sklearn
+from sklearn.model_selection import KFold, LeaveOneOut, StratifiedKFold
+
+from eis_toolkit.exceptions import InvalidCrossValidationSelected, InvalidNumberOfSplit
+
+
+def performance_model_estimation(
+    cross_validation_type: str = "LOOCV", number_of_split: int = 5
+) -> sklearn.model_selection:
+    """
+    Evaluate the feature importance of a sklearn classifier or linear model.
+
+    Parameters:
+        cross_validation_type: Select cross validation (LOOCV, SKFOLD, KFOLD).
+        number_of_split: number used to split the dataset.
+    Return:
+        Selected cross validation method
+    Raises:
+        InvalidCrossValidationSelected: When the cross validation method selected is not implemented.
+        InvalidNumberOfSplit: When the number of split is incompatible with the selected cross validation
+    """
+
+    if cross_validation_type is None:
+        raise InvalidCrossValidationSelected
+
+    if cross_validation_type != "LOOCV" and number_of_split <= 1:
+        raise InvalidNumberOfSplit
+    if cross_validation_type == "LOOCV":
+        return LeaveOneOut()
+    elif cross_validation_type == "KFOLD":
+        return KFold(n_splits=number_of_split, shuffle=True)
+    elif cross_validation_type == "SKFOLD":
+        return StratifiedKFold(n_splits=number_of_split, shuffle=True)
+    else:
+        raise InvalidCrossValidationSelected