diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cfc2c50..22f08d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,5 +55,5 @@ jobs: with: stack-name: ${{ matrix.stack-name }} python-version: ${{ matrix.python-version }} - ref-zenml: ${{ inputs.ref-zenml || 'develop' }} + ref-zenml: ${{ inputs.ref-zenml || 'feature/followup-run-metadata' }} ref-template: ${{ inputs.ref-template || github.ref }} diff --git a/template/steps/data_preprocessor.py b/template/steps/data_preprocessor.py index cd87063..43f5aba 100644 --- a/template/steps/data_preprocessor.py +++ b/template/steps/data_preprocessor.py @@ -7,7 +7,7 @@ from sklearn.preprocessing import MinMaxScaler from typing_extensions import Annotated from utils.preprocess import ColumnsDropper, DataFrameCaster, NADropper -from zenml import log_artifact_metadata, step +from zenml import log_metadata, step @step @@ -67,8 +67,9 @@ def data_preprocessor( dataset_tst = preprocess_pipeline.transform(dataset_tst) # Log metadata so we can load it in the inference pipeline - log_artifact_metadata( - artifact_name="preprocess_pipeline", + log_metadata( metadata={"random_state": random_state, "target": target}, + artifact_name="preprocess_pipeline", + infer_artifact=True, ) return dataset_trn, dataset_tst, preprocess_pipeline diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index 835d9ea..fe6c6a0 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -4,7 +4,9 @@ import pandas as pd from sklearn.base import ClassifierMixin -from zenml import log_artifact_metadata, step + +from zenml import log_metadata, step +from zenml.client import Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -12,12 +14,12 @@ @step def model_evaluator( - model: ClassifierMixin, - dataset_trn: pd.DataFrame, - dataset_tst: pd.DataFrame, - min_train_accuracy: float = 0.0, - min_test_accuracy: float = 0.0, - target: Optional[str] = "target", + model: ClassifierMixin, + dataset_trn: pd.DataFrame, + dataset_tst: pd.DataFrame, + min_train_accuracy: float = 0.0, + min_test_accuracy: float = 0.0, + target: Optional[str] = "target", ) -> float: """Evaluate a trained model. @@ -63,24 +65,31 @@ def model_evaluator( dataset_tst.drop(columns=[target]), dataset_tst[target], ) - logger.info(f"Train accuracy={trn_acc*100:.2f}%") - logger.info(f"Test accuracy={tst_acc*100:.2f}%") + logger.info(f"Train accuracy={trn_acc * 100:.2f}%") + logger.info(f"Test accuracy={tst_acc * 100:.2f}%") messages = [] if trn_acc < min_train_accuracy: messages.append( - f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !" + f"Train accuracy {trn_acc * 100:.2f}% is below {min_train_accuracy * 100:.2f}% !" ) if tst_acc < min_test_accuracy: messages.append( - f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !" + f"Test accuracy {tst_acc * 100:.2f}% is below {min_test_accuracy * 100:.2f}% !" ) else: for message in messages: logger.warning(message) - log_artifact_metadata( - metadata={"train_accuracy": float(trn_acc), "test_accuracy": float(tst_acc)}, - artifact_name="sklearn_classifier", + client = Client() + latest_classifier = client.get_artifact_version("sklearn_classifier") + + log_metadata( + metadata={ + "train_accuracy": float(trn_acc), + "test_accuracy": float(tst_acc) + }, + artifact_version_id=latest_classifier.id ) + return float(tst_acc)