diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index 090edbe2..006d6d4a 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -199,6 +199,53 @@ class NodataHandling(str, Enum): remove = "remove" +class MLPActivationFunctions(str, Enum): + """MLP activation functions.""" + + relu = "relu" + linear = "linear" + sigmoid = "sigmoid" + tanh = "tanh" + + +class MLPClassifierLastActivations(str, Enum): + """MLP classifier last activation functions.""" + + sigmoid = "sigmoid" + softmax = "softmax" + + +# class MLPRegressorLastActivations(str, Enum): +# """MLP regressor last activation functions.""" + +# linear = "linear" + + +class MLPOptimizers(str, Enum): + """MLP optimizers.""" + + adam = "adam" + adagrad = "adagrad" + rmsprop = "rmsprop" + sdg = "sdg" + + +class MLPClassifierLossFunctions(str, Enum): + """MLP classifier loss functions.""" + + binary_crossentropy = "binary_crossentropy" + categorical_crossentropy = "categorical_crossentropy" + + +class MLPRegressorLossFunctions(str, Enum): + """MLP regressor loss functions.""" + + mse = "mse" + mae = "mae" + hinge = "hinge" + huber = "huber" + + class FocalFilterMethod(str, Enum): """Focal filter methods.""" @@ -261,6 +308,22 @@ class ThresholdCriteria(str, Enum): outside = "outside" +class KerasClassifierMetrics(str, Enum): + """Metrics available for Keras classifier models.""" + + accuracy = "accuracy" + precision = "precision" + recall = "recall" + categorical_crossentropy = "categorical_crossentropy" + + +class KerasRegressorMetrics(str, Enum): + """Metrics available for Keras regressor models.""" + + mse = "mse" + mae = "mae" + + INPUT_FILE_OPTION = Annotated[ Path, typer.Option( @@ -2347,6 +2410,143 @@ def gradient_boosting_regressor_train_cli( typer.echo("Gradient boosting regressor training completed") +# MLP CLASSIFIER +@app.command() +def mlp_classifier_train_cli( + input_rasters: INPUT_FILES_ARGUMENT, + target_labels: INPUT_FILE_OPTION, + output_file: OUTPUT_FILE_OPTION, + neurons: Annotated[List[int], typer.Option()], + activation: Annotated[MLPActivationFunctions, typer.Option(case_sensitive=False)] = MLPActivationFunctions.relu, + output_neurons: int = 1, + last_activation: Annotated[ + MLPClassifierLastActivations, typer.Option(case_sensitive=False) + ] = MLPClassifierLastActivations.sigmoid, + epochs: int = 50, + batch_size: int = 32, + optimizer: Annotated[MLPOptimizers, typer.Option(case_sensitive=False)] = MLPOptimizers.adam, + learning_rate: float = 0.001, + loss_function: Annotated[ + MLPClassifierLossFunctions, typer.Option(case_sensitive=False) + ] = MLPClassifierLossFunctions.binary_crossentropy, + dropout_rate: Optional[float] = None, + early_stopping: bool = True, + es_patience: int = 5, + validation_metrics: Annotated[List[KerasClassifierMetrics], typer.Option(case_sensitive=False)] = [ + KerasClassifierMetrics.accuracy + ], + validation_split: float = 0.2, + random_state: Optional[int] = None, +): + """Train and validate an MLP classifier model using Keras.""" + from eis_toolkit.prediction.machine_learning_general import prepare_data_for_ml, save_model + from eis_toolkit.prediction.mlp import train_MLP_classifier + + X, y, _, _ = prepare_data_for_ml(input_rasters, target_labels) + + typer.echo("Progress: 30%") + + # Train (and score) the model + model, metrics_dict = train_MLP_classifier( + X=X, + y=y, + neurons=neurons, + activation=get_enum_values(activation), + output_neurons=output_neurons, + last_activation=get_enum_values(last_activation), + epochs=epochs, + batch_size=batch_size, + optimizer=get_enum_values(optimizer), + learning_rate=learning_rate, + loss_function=get_enum_values(loss_function), + dropout_rate=dropout_rate, + early_stopping=early_stopping, + es_patience=es_patience, + metrics=get_enum_values(validation_metrics), + validation_split=validation_split, + random_state=random_state, + ) + + typer.echo("Progress: 80%") + + save_model(model, output_file) + + typer.echo("Progress: 90%") + + json_str = json.dumps(metrics_dict) + typer.echo("Progress: 100%") + typer.echo(f"Results: {json_str}") + + typer.echo("MLP classifier training completed.") + + +# MLP REGRESSOR +@app.command() +def mlp_regressor_train_cli( + input_rasters: INPUT_FILES_ARGUMENT, + target_labels: INPUT_FILE_OPTION, + output_file: OUTPUT_FILE_OPTION, + neurons: Annotated[List[int], typer.Option()], + activation: Annotated[MLPActivationFunctions, typer.Option(case_sensitive=False)] = MLPActivationFunctions.relu, + output_neurons: int = 1, + epochs: int = 50, + batch_size: int = 32, + optimizer: Annotated[MLPOptimizers, typer.Option(case_sensitive=False)] = MLPOptimizers.adam, + learning_rate: float = 0.001, + loss_function: Annotated[ + MLPRegressorLossFunctions, typer.Option(case_sensitive=False) + ] = MLPRegressorLossFunctions.mse, + dropout_rate: Optional[float] = None, + early_stopping: bool = True, + es_patience: int = 5, + validation_metrics: Annotated[List[KerasRegressorMetrics], typer.Option(case_sensitive=False)] = [ + KerasRegressorMetrics.mse + ], + validation_split: float = 0.2, + random_state: Optional[int] = None, +): + """Train and validate an MLP regressor model using Keras.""" + from eis_toolkit.prediction.machine_learning_general import prepare_data_for_ml, save_model + from eis_toolkit.prediction.mlp import train_MLP_regressor + + X, y, _, _ = prepare_data_for_ml(input_rasters, target_labels) + + typer.echo("Progress: 30%") + + # Train (and score) the model + model, metrics_dict = train_MLP_regressor( + X=X, + y=y, + neurons=neurons, + activation=get_enum_values(activation), + output_neurons=output_neurons, + last_activation="linear", + epochs=epochs, + batch_size=batch_size, + optimizer=get_enum_values(optimizer), + learning_rate=learning_rate, + loss_function=get_enum_values(loss_function), + dropout_rate=dropout_rate, + early_stopping=early_stopping, + es_patience=es_patience, + metrics=get_enum_values(validation_metrics), + validation_split=validation_split, + random_state=random_state, + ) + + typer.echo("Progress: 80%") + + save_model(model, output_file) + + typer.echo("Progress: 90%") + + json_str = json.dumps(metrics_dict) + typer.echo("Progress: 100%") + typer.echo(f"Results: {json_str}") + + typer.echo("MLP regressor training completed.") + + # TEST CLASSIFIER ML MODEL @app.command() def classifier_test_cli( diff --git a/eis_toolkit/prediction/machine_learning_general.py b/eis_toolkit/prediction/machine_learning_general.py index 4c239df9..0d95cb61 100644 --- a/eis_toolkit/prediction/machine_learning_general.py +++ b/eis_toolkit/prediction/machine_learning_general.py @@ -12,6 +12,7 @@ from scipy import sparse from sklearn.base import BaseEstimator from sklearn.model_selection import KFold, LeaveOneOut, StratifiedKFold, train_test_split +from tensorflow import keras from eis_toolkit.evaluation.scoring import score_predictions from eis_toolkit.exceptions import ( @@ -31,7 +32,7 @@ @beartype -def save_model(model: BaseEstimator, path: Path) -> None: +def save_model(model: Union[BaseEstimator, keras.Model], path: Path) -> None: """ Save a trained Sklearn model to a .joblib file. @@ -43,7 +44,7 @@ def save_model(model: BaseEstimator, path: Path) -> None: @beartype -def load_model(path: Path) -> BaseEstimator: +def load_model(path: Path) -> Union[BaseEstimator, keras.Model]: """ Load a Sklearn model from a .joblib file. diff --git a/eis_toolkit/prediction/machine_learning_predict.py b/eis_toolkit/prediction/machine_learning_predict.py index 4a3c59e6..5bcebead 100644 --- a/eis_toolkit/prediction/machine_learning_predict.py +++ b/eis_toolkit/prediction/machine_learning_predict.py @@ -5,7 +5,7 @@ from sklearn.base import BaseEstimator, is_classifier from tensorflow import keras -from eis_toolkit.exceptions import InvalidModelTypeException +from eis_toolkit.exceptions import InvalidDataShapeException, InvalidModelTypeException @beartype @@ -77,9 +77,19 @@ def predict_regressor( Regression model prediction array. Raises: - InvalidModelTypeException: Input model is not a regressor model. + InvalidModelTypeException: Input model is not a regressor model or is not recognized. + InvalidDataShapeException: Input models does not have single output unit. """ - if is_classifier(model): - raise InvalidModelTypeException(f"Expected a regressor model: {type(model)}.") + if isinstance(model, BaseEstimator): + if is_classifier(model): + raise InvalidModelTypeException(f"Expected a regressor model: {type(model)}.") + elif isinstance(model, keras.Model): + if not model.output_shape[-1] == 1: + raise InvalidDataShapeException(f"Expected a single output unit for a regressor model: {type(model)}.") + else: + raise InvalidModelTypeException(f"Model type not recognized: {type(model)}.") + result = model.predict(data) + if result.ndim == 2: + result = result.squeeze() return result diff --git a/eis_toolkit/prediction/mlp.py b/eis_toolkit/prediction/mlp.py index 6a6f7a88..c8a5f081 100644 --- a/eis_toolkit/prediction/mlp.py +++ b/eis_toolkit/prediction/mlp.py @@ -4,6 +4,7 @@ from beartype import beartype from beartype.typing import Literal, Optional, Sequence, Tuple from tensorflow import keras +from tensorflow.keras.metrics import CategoricalCrossentropy, MeanAbsoluteError, MeanSquaredError, Precision, Recall from tensorflow.keras.optimizers.legacy import SGD, Adagrad, Adam, RMSprop from eis_toolkit.exceptions import InvalidDataShapeException, InvalidParameterValueException @@ -23,6 +24,23 @@ def _keras_optimizer(optimizer: str, **kwargs): raise InvalidParameterValueException(f"Unidentified optimizer: {optimizer}") +def _keras_metric(metric_name: str): + if metric_name.lower() == "accuracy": + return "accuracy" + elif metric_name.lower() == "precision": + return Precision(name="precision") + elif metric_name.lower() == "recall": + return Recall(name="recall") + elif metric_name.lower() == "categorical_crossentropy": + return CategoricalCrossentropy(name="categorical_crossentropy") + elif metric_name.lower() == "mse": + return MeanSquaredError(name="mse") + elif metric_name.lower() == "mae": + return MeanAbsoluteError(name="mae") + else: + raise InvalidParameterValueException(f"Unsupported metric for Keras model: {metric_name}") + + def _check_MLP_inputs( neurons: Sequence[int], validation_split: Optional[float], @@ -105,11 +123,11 @@ def train_MLP_classifier( dropout_rate: Optional[Number] = None, early_stopping: bool = True, es_patience: int = 5, - metrics: Optional[Sequence[Literal["accuracy", "precision", "recall", "f1_score"]]] = ["accuracy"], + metrics: Optional[Sequence[Literal["accuracy", "precision", "recall"]]] = ["accuracy"], random_state: Optional[int] = None, ) -> Tuple[keras.Model, dict]: """ - Train MLP (Multilayer Perceptron) using Keras. + Train MLP (Multilayer Perceptron) classifier using Keras. Creates a Sequential model with Dense NN layers. For each element in `neurons`, Dense layer with corresponding dimensionality/neurons is created with the specified activation function (`activation`). If `dropout_rate` is @@ -184,7 +202,9 @@ def train_MLP_classifier( model.add(keras.layers.Dense(units=output_neurons, activation=last_activation)) model.compile( - optimizer=_keras_optimizer(optimizer, learning_rate=learning_rate), loss=loss_function, metrics=metrics + optimizer=_keras_optimizer(optimizer, learning_rate=learning_rate), + loss=loss_function, + metrics=[_keras_metric(metric) for metric in metrics], ) # 3. Train the model @@ -222,11 +242,11 @@ def train_MLP_regressor( dropout_rate: Optional[Number] = None, early_stopping: bool = True, es_patience: int = 5, - metrics: Optional[Sequence[Literal["mse", "rmse", "mae"]]] = ["mse"], + metrics: Optional[Sequence[Literal["mse", "rmse", "mae", "r2"]]] = ["mse"], random_state: Optional[int] = None, ) -> Tuple[keras.Model, dict]: """ - Train MLP (Multilayer Perceptron) using Keras. + Train MLP (Multilayer Perceptron) regressor using Keras. Creates a Sequential model with Dense NN layers. For each element in `neurons`, Dense layer with corresponding dimensionality/neurons is created with the specified activation function (`activation`). If `dropout_rate` is @@ -297,7 +317,9 @@ def train_MLP_regressor( model.add(keras.layers.Dense(units=output_neurons, activation=last_activation)) model.compile( - optimizer=_keras_optimizer(optimizer, learning_rate=learning_rate), loss=loss_function, metrics=metrics + optimizer=_keras_optimizer(optimizer, learning_rate=learning_rate), + loss=loss_function, + metrics=[_keras_metric(metric) for metric in metrics], ) # 3. Train the model