From 97ac29297ddeb465b199a7f89ba18d8be09a690d Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Wed, 13 Nov 2024 20:19:11 +0100 Subject: [PATCH 1/7] removing the deprecated calls --- template/steps/data_preprocessor.py | 4 ++-- template/steps/model_evaluator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/template/steps/data_preprocessor.py b/template/steps/data_preprocessor.py index cd87063..c4039a9 100644 --- a/template/steps/data_preprocessor.py +++ b/template/steps/data_preprocessor.py @@ -7,7 +7,7 @@ from sklearn.preprocessing import MinMaxScaler from typing_extensions import Annotated from utils.preprocess import ColumnsDropper, DataFrameCaster, NADropper -from zenml import log_artifact_metadata, step +from zenml import log_metadata, step @step @@ -67,7 +67,7 @@ def data_preprocessor( dataset_tst = preprocess_pipeline.transform(dataset_tst) # Log metadata so we can load it in the inference pipeline - log_artifact_metadata( + log_metadata( artifact_name="preprocess_pipeline", metadata={"random_state": random_state, "target": target}, ) diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index 835d9ea..95cc6be 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -4,7 +4,7 @@ import pandas as pd from sklearn.base import ClassifierMixin -from zenml import log_artifact_metadata, step +from zenml import log_metadata, step from zenml.logger import get_logger logger = get_logger(__name__) @@ -79,7 +79,7 @@ def model_evaluator( for message in messages: logger.warning(message) - log_artifact_metadata( + log_metadata( metadata={"train_accuracy": float(trn_acc), "test_accuracy": float(tst_acc)}, artifact_name="sklearn_classifier", ) From 223516c0600a56da1c594b8e134d497465c9c0b6 Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Wed, 13 Nov 2024 20:25:13 +0100 Subject: [PATCH 2/7] correcting the ref --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cfc2c50..22f08d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,5 +55,5 @@ jobs: with: stack-name: ${{ matrix.stack-name }} python-version: ${{ matrix.python-version }} - ref-zenml: ${{ inputs.ref-zenml || 'develop' }} + ref-zenml: ${{ inputs.ref-zenml || 'feature/followup-run-metadata' }} ref-template: ${{ inputs.ref-template || github.ref }} From 52bf387b7027687153c7aaa17c08f413a7c67081 Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Wed, 13 Nov 2024 22:41:12 +0100 Subject: [PATCH 3/7] fixing the review comments --- template/steps/data_preprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template/steps/data_preprocessor.py b/template/steps/data_preprocessor.py index c4039a9..fd8a2f0 100644 --- a/template/steps/data_preprocessor.py +++ b/template/steps/data_preprocessor.py @@ -68,7 +68,7 @@ def data_preprocessor( # Log metadata so we can load it in the inference pipeline log_metadata( - artifact_name="preprocess_pipeline", metadata={"random_state": random_state, "target": target}, + artifact_name="preprocess_pipeline", ) return dataset_trn, dataset_tst, preprocess_pipeline From b8d446735f4b385322ca4ab5ee1f738eafcc4d0e Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Thu, 28 Nov 2024 14:32:57 +0100 Subject: [PATCH 4/7] fixing the steps --- template/steps/data_preprocessor.py | 1 + template/steps/model_evaluator.py | 1 + 2 files changed, 2 insertions(+) diff --git a/template/steps/data_preprocessor.py b/template/steps/data_preprocessor.py index fd8a2f0..43f5aba 100644 --- a/template/steps/data_preprocessor.py +++ b/template/steps/data_preprocessor.py @@ -70,5 +70,6 @@ def data_preprocessor( log_metadata( metadata={"random_state": random_state, "target": target}, artifact_name="preprocess_pipeline", + infer_artifact=True, ) return dataset_trn, dataset_tst, preprocess_pipeline diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index 95cc6be..ecd3ddd 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -82,5 +82,6 @@ def model_evaluator( log_metadata( metadata={"train_accuracy": float(trn_acc), "test_accuracy": float(tst_acc)}, artifact_name="sklearn_classifier", + infer_artifact=True, ) return float(tst_acc) From fb8cdb745249b19f081da62492aaa195d4464b23 Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Thu, 28 Nov 2024 15:15:29 +0100 Subject: [PATCH 5/7] new way to fetch artifacts --- template/steps/model_evaluator.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index ecd3ddd..8ae51c0 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -4,7 +4,7 @@ import pandas as pd from sklearn.base import ClassifierMixin -from zenml import log_metadata, step +from zenml import log_, step, Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -79,9 +79,8 @@ def model_evaluator( for message in messages: logger.warning(message) - log_metadata( - metadata={"train_accuracy": float(trn_acc), "test_accuracy": float(tst_acc)}, - artifact_name="sklearn_classifier", - infer_artifact=True, - ) + client = Client() + latest_classifier = client.get_artifact_version("sklearn_classifier") + log_metadata(metadata=metadata, artifact_version_id=latest_classifier.id) + return float(tst_acc) From 11243560ccfaf7bf06bc5fd2c7002e7b412486ca Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Thu, 28 Nov 2024 15:21:54 +0100 Subject: [PATCH 6/7] fixed the errors --- template/steps/model_evaluator.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index 8ae51c0..0a1511c 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -4,7 +4,7 @@ import pandas as pd from sklearn.base import ClassifierMixin -from zenml import log_, step, Client +from zenml import log_metadata, step, Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -81,6 +81,13 @@ def model_evaluator( client = Client() latest_classifier = client.get_artifact_version("sklearn_classifier") - log_metadata(metadata=metadata, artifact_version_id=latest_classifier.id) + + log_metadata( + metadata={ + "train_accuracy": float(trn_acc), + "test_accuracy": float(tst_acc) + }, + artifact_version_id=latest_classifier.id + ) return float(tst_acc) From b60e4416b9fcc5ac1d15051c20f8034432483041 Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Thu, 28 Nov 2024 15:31:24 +0100 Subject: [PATCH 7/7] fixed imports --- template/steps/model_evaluator.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/template/steps/model_evaluator.py b/template/steps/model_evaluator.py index 0a1511c..fe6c6a0 100644 --- a/template/steps/model_evaluator.py +++ b/template/steps/model_evaluator.py @@ -4,7 +4,9 @@ import pandas as pd from sklearn.base import ClassifierMixin -from zenml import log_metadata, step, Client + +from zenml import log_metadata, step +from zenml.client import Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -12,12 +14,12 @@ @step def model_evaluator( - model: ClassifierMixin, - dataset_trn: pd.DataFrame, - dataset_tst: pd.DataFrame, - min_train_accuracy: float = 0.0, - min_test_accuracy: float = 0.0, - target: Optional[str] = "target", + model: ClassifierMixin, + dataset_trn: pd.DataFrame, + dataset_tst: pd.DataFrame, + min_train_accuracy: float = 0.0, + min_test_accuracy: float = 0.0, + target: Optional[str] = "target", ) -> float: """Evaluate a trained model. @@ -63,17 +65,17 @@ def model_evaluator( dataset_tst.drop(columns=[target]), dataset_tst[target], ) - logger.info(f"Train accuracy={trn_acc*100:.2f}%") - logger.info(f"Test accuracy={tst_acc*100:.2f}%") + logger.info(f"Train accuracy={trn_acc * 100:.2f}%") + logger.info(f"Test accuracy={tst_acc * 100:.2f}%") messages = [] if trn_acc < min_train_accuracy: messages.append( - f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !" + f"Train accuracy {trn_acc * 100:.2f}% is below {min_train_accuracy * 100:.2f}% !" ) if tst_acc < min_test_accuracy: messages.append( - f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !" + f"Test accuracy {tst_acc * 100:.2f}% is below {min_test_accuracy * 100:.2f}% !" ) else: for message in messages: