Skip to content

Commit

Permalink
Merge pull request #390 from GispoCoding/389-separate-predicting-and-…
Browse files Browse the repository at this point in the history
…testing-cli-functions-for-classifier-and-regressor-models

389 refactor ML model predicting and testing
  • Loading branch information
nmaarnio authored May 21, 2024
2 parents cca25dd + 493e6c6 commit e7354ca
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 74 deletions.
180 changes: 117 additions & 63 deletions eis_toolkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2313,97 +2313,162 @@ def gradient_boosting_regressor_train_cli(
typer.echo("Gradient boosting regressor training completed")


# EVALUATE ML MODEL
# TEST CLASSIFIER ML MODEL
@app.command()
def evaluate_trained_model_cli(
def classifier_test_cli(
input_rasters: INPUT_FILES_ARGUMENT,
target_labels: INPUT_FILE_OPTION,
model_file: INPUT_FILE_OPTION,
output_raster: OUTPUT_FILE_OPTION,
validation_metrics: Annotated[List[str], typer.Option()],
output_raster_probability: OUTPUT_FILE_OPTION,
output_raster_classified: OUTPUT_FILE_OPTION,
classification_threshold: float = 0.5,
test_metrics: Annotated[List[ClassifierMetrics], typer.Option(case_sensitive=False)] = [ClassifierMetrics.accuracy],
):
"""Predict and evaluate a trained machine learning model by predicting and scoring."""
from sklearn.base import is_classifier

"""Test trained machine learning classifier model by predicting and scoring."""
from eis_toolkit.evaluation.scoring import score_predictions
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
from eis_toolkit.prediction.machine_learning_predict import predict_classifier, predict_regressor
from eis_toolkit.prediction.machine_learning_predict import predict_classifier

X, y, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters, target_labels)
typer.echo("Progress: 30%")

model = load_model(model_file)
if is_classifier(model):
predictions, probabilities = predict_classifier(X, model, True)
probabilities = probabilities[:, 1]
probabilities = probabilities.astype(np.float32)
probabilities_reshaped = reshape_predictions(
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
predictions, probabilities = predict_classifier(X, model, classification_threshold, True)
probabilities_reshaped = reshape_predictions(
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
)
predictions_reshaped = reshape_predictions(
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
)

metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
typer.echo("Progress: 80%")

out_profile = reference_profile.copy()
out_profile.update({"count": 1, "dtype": np.float32})

with rasterio.open(output_raster_probability, "w", **out_profile) as dst:
dst.write(probabilities_reshaped, 1)
with rasterio.open(output_raster_classified, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)

typer.echo("\n")
for key, value in metrics_dict.items():
typer.echo(f"{key}: {value}")
typer.echo("\n")

typer.echo("Progress: 100%")
typer.echo(
(
"Testing classifier model completed, writing rasters to "
f"{output_raster_probability} and {output_raster_classified}."
)
else:
predictions = predict_regressor(X, model)
)

metrics_dict = score_predictions(y, predictions, validation_metrics)

# TEST REGRESSOR ML MODEL
@app.command()
def regressor_test_cli(
input_rasters: INPUT_FILES_ARGUMENT,
target_labels: INPUT_FILE_OPTION,
model_file: INPUT_FILE_OPTION,
output_raster: OUTPUT_FILE_OPTION,
test_metrics: Annotated[List[RegressorMetrics], typer.Option(case_sensitive=False)] = [RegressorMetrics.mse],
):
"""Test trained machine learning regressor model by predicting and scoring."""
from eis_toolkit.evaluation.scoring import score_predictions
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
from eis_toolkit.prediction.machine_learning_predict import predict_regressor

X, y, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters, target_labels)
typer.echo("Progress: 30%")

model = load_model(model_file)
predictions = predict_regressor(X, model)
predictions_reshaped = reshape_predictions(
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
)

metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
typer.echo("Progress: 80%")

json_str = json.dumps(metrics_dict)

out_profile = reference_profile.copy()
out_profile.update({"count": 1, "dtype": np.float32})

if is_classifier(model):
directory = os.path.split(output_raster)[0]
name = os.path.splitext(os.path.basename(output_raster))[0]
labels_output = os.path.join(directory, name + "_labels" + ".tif")
probabilities_output = os.path.join(directory, name + "_probabilities" + ".tif")
for output_path, output_data in zip(
[labels_output, probabilities_output], [predictions_reshaped, probabilities_reshaped]
):
with rasterio.open(output_path, "w", **out_profile) as dst:
dst.write(output_data, 1)
else:
with rasterio.open(output_raster, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)
with rasterio.open(output_raster, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)

typer.echo("Progress: 100%")
typer.echo(f"Results: {json_str}")
typer.echo("\n")
for key, value in metrics_dict.items():
typer.echo(f"{key}: {value}")
typer.echo("\n")

typer.echo("Evaluating trained model completed")
typer.echo("Progress: 100%\n")

typer.echo(f"Testing regressor model completed, writing raster to {output_raster}.")


# PREDICT WITH TRAINED ML MODEL
@app.command()
def predict_with_trained_model_cli(
def classifier_predict_cli(
input_rasters: INPUT_FILES_ARGUMENT,
model_file: INPUT_FILE_OPTION,
output_raster: OUTPUT_FILE_OPTION,
output_raster_probability: OUTPUT_FILE_OPTION,
output_raster_classified: OUTPUT_FILE_OPTION,
classification_threshold: float = 0.5,
):
"""Predict with a trained machine learning model."""
from sklearn.base import is_classifier

"""Predict with a trained machine learning classifier model."""
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
from eis_toolkit.prediction.machine_learning_predict import predict_classifier, predict_regressor
from eis_toolkit.prediction.machine_learning_predict import predict_classifier

X, _, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters)

typer.echo("Progress: 30%")

model = load_model(model_file)
if is_classifier(model):
predictions, probabilities = predict_classifier(X, model, True)
probabilities = probabilities[:, 1]
probabilities = probabilities.astype(np.float32)
probabilities_reshaped = reshape_predictions(
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
predictions, probabilities = predict_classifier(X, model, classification_threshold, True)
probabilities_reshaped = reshape_predictions(
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
)
predictions_reshaped = reshape_predictions(
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
)
typer.echo("Progress: 80%")

out_profile = reference_profile.copy()
out_profile.update({"count": 1, "dtype": np.float32})

with rasterio.open(output_raster_probability, "w", **out_profile) as dst:
dst.write(probabilities_reshaped, 1)
with rasterio.open(output_raster_classified, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)

typer.echo("Progress: 100%")
typer.echo(
(
"Predicting with classifier model completed, writing rasters to "
f"{output_raster_probability} and {output_raster_classified}."
)
else:
predictions = predict_regressor(X, model)
)


# PREDICT WITH TRAINED ML MODEL
@app.command()
def regressor_predict_cli(
input_rasters: INPUT_FILES_ARGUMENT,
model_file: INPUT_FILE_OPTION,
output_raster: OUTPUT_FILE_OPTION,
):
"""Predict with a trained machine learning regressor model."""
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
from eis_toolkit.prediction.machine_learning_predict import predict_regressor

X, _, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters)

typer.echo("Progress: 30%")

model = load_model(model_file)
predictions = predict_regressor(X, model)
predictions_reshaped = reshape_predictions(
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
)
Expand All @@ -2413,22 +2478,11 @@ def predict_with_trained_model_cli(
out_profile = reference_profile.copy()
out_profile.update({"count": 1, "dtype": np.float32})

if is_classifier(model):
directory = os.path.split(output_raster)[0]
name = os.path.splitext(os.path.basename(output_raster))[0]
labels_output = os.path.join(directory, name + "_labels" + ".tif")
probabilities_output = os.path.join(directory, name + "_probabilities" + ".tif")
for output_path, output_data in zip(
[labels_output, probabilities_output], [predictions_reshaped, probabilities_reshaped]
):
with rasterio.open(output_path, "w", **out_profile) as dst:
dst.write(output_data, 1)
else:
with rasterio.open(output_raster, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)
with rasterio.open(output_raster, "w", **out_profile) as dst:
dst.write(predictions_reshaped, 1)

typer.echo("Progress: 100%")
typer.echo("Predicting completed")
typer.echo(f"Predicting with regressor model completed, writing raster to {output_raster}.")


# FUZZY OVERLAYS
Expand Down
4 changes: 4 additions & 0 deletions eis_toolkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class InvalidDataShapeException(Exception):
"""Exception error for datasets with invalid shapes."""


class InvalidModelTypeException(Exception):
"""Exception error for invalid model type."""


class InvalidParameterValueException(Exception):
"""Exception error class for invalid parameter values."""

Expand Down
45 changes: 36 additions & 9 deletions eis_toolkit/prediction/machine_learning_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,62 @@
import pandas as pd
from beartype import beartype
from beartype.typing import Tuple, Union
from sklearn.base import BaseEstimator
from sklearn.base import BaseEstimator, is_classifier
from tensorflow import keras

from eis_toolkit.exceptions import InvalidModelTypeException


@beartype
def predict_classifier(
data: Union[np.ndarray, pd.DataFrame], model: Union[BaseEstimator, keras.Model], include_probabilities: bool = True
data: Union[np.ndarray, pd.DataFrame],
model: Union[BaseEstimator, keras.Model],
classification_threshold: float = 0.5,
include_probabilities: bool = True,
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
"""
Predict with a trained model.
Predict with a trained classifier model.
Args:
data: Data used to make predictions.
model: Trained classifier or regressor. Can be any machine learning model trained with
EIS Toolkit (Sklearn and Keras models).
classification_threshold: Threshold for classifying based on probabilities. Only used for
binary classification. Defaults to 0.5.
include_probabilities: If the probability array should be returned too. Defaults to True.
Returns:
Predicted labels and optionally predicted probabilities by a classifier model.
Predicted labels and optionally predicted probabilities as one-dimensional arrays by a classifier model.
Raises:
InvalidModelTypeException: Input model is not a classifier model.
"""
if isinstance(model, keras.Model):
probabilities = model.predict(data)
labels = probabilities.argmax(axis=-1)
probabilities = model.predict(data).astype(np.float32)
if probabilities.shape[1] == 1: # Binary classification
probabilities = probabilities.squeeze()
labels = (probabilities >= classification_threshold).astype(np.float32)
else: # Multiclass classification
labels = probabilities.argmax(axis=-1).astype(np.float32)
if include_probabilities:
return labels, probabilities
else:
return labels
elif isinstance(model, BaseEstimator):
labels = model.predict(data)
if not is_classifier(model):
raise InvalidModelTypeException(f"Expected a classifier model: {type(model)}.")
probabilities = model.predict_proba(data).astype(np.float32)
if probabilities.shape[1] == 2: # Binary classification
probabilities = probabilities[:, 1]
labels = (probabilities >= classification_threshold).astype(np.float32)
else: # Multiclass classification
labels = probabilities.argmax(axis=-1).astype(np.float32)
if include_probabilities:
probabilities = model.predict_proba(data)
return labels, probabilities
else:
return labels
else:
raise InvalidModelTypeException(f"Model type not recognized: {type(model)}.")


@beartype
Expand All @@ -44,7 +66,7 @@ def predict_regressor(
model: Union[BaseEstimator, keras.Model],
) -> np.ndarray:
"""
Predict with a trained model.
Predict with a trained regressor model.
Args:
data: Data used to make predictions.
Expand All @@ -53,6 +75,11 @@ def predict_regressor(
Returns:
Regression model prediction array.
Raises:
InvalidModelTypeException: Input model is not a regressor model.
"""
if is_classifier(model):
raise InvalidModelTypeException(f"Expected a regressor model: {type(model)}.")
result = model.predict(data)
return result
4 changes: 2 additions & 2 deletions tests/prediction/machine_learning_general_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_evaluate_model_sklearn():
X_train, y_train, model=RF_MODEL, validation_method="none", metrics=CLF_METRICS, random_state=42
)

predictions = predict_classifier(X_test, model, include_probabilities=False)
predictions = predict_classifier(X_test, model, classification_threshold=0.5, include_probabilities=False)
accuracy = score_predictions(y_test, predictions, "accuracy")
np.testing.assert_equal(accuracy, 1.0)

Expand All @@ -131,7 +131,7 @@ def test_predict_classifier_sklearn():
X_train, y_train, model=RF_MODEL, validation_method="none", metrics=CLF_METRICS, random_state=42
)

predicted_labels, predicted_probabilities = predict_classifier(X_test, model, True)
predicted_labels, predicted_probabilities = predict_classifier(X_test, model, include_probabilities=True)
np.testing.assert_equal(len(predicted_labels), len(y_test))
np.testing.assert_equal(len(predicted_probabilities), len(y_test))

Expand Down

0 comments on commit e7354ca

Please sign in to comment.