zenml-io · bcdurak · Nov 6, 2024 · Nov 13, 2024 · Nov 13, 2024 · Dec 9, 2024
diff --git a/classifier-e2e/README.md b/classifier-e2e/README.md
@@ -11,58 +11,76 @@ pinned: false
 license: apache-2.0
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# ZenML MLOps Breast Cancer Classification Demo
 
-# 📜 ZenML Stack Show Case
+## 🌍 Project Overview
 
-This project aims to demonstrate the power of stacks. The code in this 
-project assumes that you have quite a few stacks registered already. 
+This is a minimalistic MLOps project demonstrating how to put machine learning 
+workflows into production using ZenML. The project focuses on building a breast 
+cancer classification model with end-to-end ML pipeline management.
 
-## default
-  * `default` Orchestrator
-  * `default` Artifact Store
+### Key Features
 
-```commandline
-zenml stack set default
-python run.py --training-pipeline
+- 🔬 Feature engineering pipeline
+- 🤖 Model training pipeline
+- 🧪 Batch inference pipeline
+- 📊 Artifact and model lineage tracking
+- 🔗 Integration with Weights & Biases for experiment tracking
+
+## 🚀 Installation
+
+1. Clone the repository
+2. Install requirements:
+    ```bash
+    pip install -r requirements.txt
+    ```
+3. Install ZenML integrations:
+    ```bash
+    zenml integration install sklearn xgboost wandb -y
+    zenml login
+    zenml init
+    ```
+4. You need to register a stack with a [Weights & Biases Experiment Tracker](https://docs.zenml.io/stack-components/experiment-trackers/wandb). 
+
+## 🧠 Project Structure
+
+- `steps/`: Contains individual pipeline steps
+- `pipelines/`: Pipeline definitions
+- `run.py`: Main script to execute pipelines
+
+## 🔍 Workflow and Execution
+
+First, you need to set your stack:
+
+```bash
+zenml stack set stack-with-wandb
 ```
 
-## local-sagemaker-step-operator-stack
-  * `default` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
+### 1. Data Loading and Feature Engineering
 
-```commandline
-zenml stack set local-sagemaker-step-operator-stack
-zenml integration install aws -y
-python run.py --training-pipeline
+- Uses the Breast Cancer dataset from scikit-learn
+- Splits data into training and inference sets
+- Preprocesses data for model training
+
+```bash
+python run.py --feature-pipeline
 ```
 
-## sagemaker-airflow-stack
-  * `Airflow` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
-
-```commandline
-zenml stack set sagemaker-airflow-stack
-zenml integration install airflow -y
-pip install apache-airflow-providers-docker apache-airflow~=2.5.0
-zenml stack up
+### 2. Model Training
+
+- Supports multiple model types (SGD, XGBoost)
+- Evaluates and compares model performance
+- Tracks model metrics with Weights & Biases
+
+```bash
 python run.py --training-pipeline
 ```
 
-## sagemaker-stack
-  * `Sagemaker` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
+### 3. Batch Inference
 
-```commandline
-zenml stack set sagemaker-stack
-python run.py --training-pipeline
+- Loads production model
+- Generates predictions on new data
+
+```bash
+python run.py --inference-pipeline
 ```
diff --git a/classifier-e2e/requirements.txt b/classifier-e2e/requirements.txt
@@ -1,4 +1,4 @@
-zenml[server]>=0.55.2
+zenml[server]>=0.70.0
 notebook
 scikit-learn<1.3
 s3fs>2022.3.0,<=2023.4.0

diff --git a/classifier-e2e/run_full.ipynb b/classifier-e2e/run_full.ipynb
@@ -38,7 +38,7 @@
    "source": [
     "! pip3 install -r requirements.txt\n",
     "! zenml integration install sklearn xgboost -y\n",
-    "! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
+    "! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
     "\n",
     "import IPython\n",
     "IPython.Application.instance().kernel.do_shutdown(restart=True)"
@@ -941,10 +941,17 @@
     "        .ravel()\n",
     "        .tolist(),\n",
     "    }\n",
-    "    log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
-    "    log_artifact_metadata(\n",
+    "\n",
+    "    try:\n",
+    "        if get_step_context().model:\n",
+    "            log_metadata(metadata=metadata, infer_model=True)\n",
+    "    except StepContextError:\n",
+    "        # If a model is not configured, it is not able to log metadata\n",
+    "        pass\n",
+    "\n",
+    "    log_metadata(\n",
     "        metadata=metadata,\n",
-    "        artifact_name=\"breast_cancer_classifier\",\n",
+    "        artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
     "    )\n",
     "\n",
     "    wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1073,6 +1080,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "1e2130b9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1083,6 +1091,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "476cbf5c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1091,6 +1100,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "75df10e7",
    "metadata": {},
    "source": [
     "Now full run executed on local stack and experiment is tracked using Model Control Plane and Weights&Biases.\n",
@@ -1103,6 +1113,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "bfd6345f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1113,6 +1124,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "24358031",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1136,7 +1148,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

diff --git a/classifier-e2e/run_skip_basics.ipynb b/classifier-e2e/run_skip_basics.ipynb
@@ -38,7 +38,7 @@
    "source": [
     "! pip3 install -r requirements.txt\n",
     "! zenml integration install sklearn xgboost -y\n",
-    "! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
+    "! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
     "\n",
     "import IPython\n",
     "IPython.Application.instance().kernel.do_shutdown(restart=True)"
@@ -829,10 +829,17 @@
     "        .ravel()\n",
     "        .tolist(),\n",
     "    }\n",
-    "    log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
-    "    log_artifact_metadata(\n",
+    "\n",
+    "    try:\n",
+    "        if get_step_context().model:\n",
+    "            log_metadata(metadata=metadata, infer_model=True)\n",
+    "    except StepContextError:\n",
+    "        # If a model is not configured, it is not able to log metadata\n",
+    "        pass\n",
+    "\n",
+    "    log_metadata(\n",
     "        metadata=metadata,\n",
-    "        artifact_name=\"breast_cancer_classifier\",\n",
+    "        artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
     "    )\n",
     "\n",
     "    wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1211,7 +1218,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

diff --git a/classifier-e2e/steps/deploy_endpoint.py b/classifier-e2e/steps/deploy_endpoint.py
@@ -7,6 +7,7 @@
 from utils.aws import get_aws_config
 from utils.sagemaker_materializer import SagemakerPredictorMaterializer
 from zenml import ArtifactConfig, get_step_context, log_artifact_metadata, step
+from zenml.enums import ArtifactType
 
 
 @step(
@@ -16,7 +17,10 @@
 def deploy_endpoint() -> (
     Annotated[
         Predictor,
-        ArtifactConfig(name="sagemaker_endpoint", is_deployment_artifact=True),
+        ArtifactConfig(
+            name="sagemaker_endpoint",
+            artifact_type=ArtifactType.SERVICE
+        ),
     ]
 ):
     role, session, region = get_aws_config()

diff --git a/classifier-e2e/steps/model_evaluator.py b/classifier-e2e/steps/model_evaluator.py
@@ -21,12 +21,7 @@
 import wandb
 from sklearn.base import ClassifierMixin
 from sklearn.metrics import confusion_matrix
-from zenml import (
-    get_step_context,
-    log_artifact_metadata,
-    log_model_metadata,
-    step,
-)
+from zenml import step, log_metadata, get_step_context
 from zenml.client import Client
 from zenml.exceptions import StepContextError
 from zenml.logger import get_logger
@@ -60,12 +55,12 @@ def model_evaluator(
     step to force the pipeline run to fail early and all subsequent steps to
     be skipped.
 
-    This step is parameterized to configure the step independently of the step code,
-    before running it in a pipeline. In this example, the step can be configured
-    to use different values for the acceptable model performance thresholds and
-    to control whether the pipeline run should fail if the model performance
-    does not meet the minimum criteria. See the documentation for more
-    information:
+    This step is parameterized to configure the step independently of the step
+    code, before running it in a pipeline. In this example, the step can be
+    configured to use different values for the acceptable model performance
+    thresholds and to control whether the pipeline run should fail if the model
+    performance does not meet the minimum criteria. See the documentation for
+    more information:
 
         https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines
 
@@ -89,17 +84,19 @@ def model_evaluator(
         dataset_tst.drop(columns=[target]),
         dataset_tst[target],
     )
-    logger.info(f"Train accuracy={trn_acc*100:.2f}%")
-    logger.info(f"Test accuracy={tst_acc*100:.2f}%")
+    logger.info(f"Train accuracy={trn_acc * 100:.2f}%")
+    logger.info(f"Test accuracy={tst_acc * 100:.2f}%")
 
     messages = []
     if trn_acc < min_train_accuracy:
         messages.append(
-            f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !"
+            f"Train accuracy {trn_acc * 100:.2f}% is below "
+            f"{min_train_accuracy * 100:.2f}% !"
         )
     if tst_acc < min_test_accuracy:
         messages.append(
-            f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !"
+            f"Test accuracy {tst_acc * 100:.2f}% is below "
+            f"{min_test_accuracy * 100:.2f}% !"
         )
     else:
         for message in messages:
@@ -115,14 +112,14 @@ def model_evaluator(
     }
     try:
         if get_step_context().model:
-            log_model_metadata(metadata={"wandb_url": wandb.run.url})
+            log_metadata(metadata=metadata, infer_model=True)
     except StepContextError:
         # if model not configured not able to log metadata
         pass
 
-    log_artifact_metadata(
+    log_metadata(
         metadata=metadata,
-        artifact_name="breast_cancer_classifier",
+        artifact_version_id=get_step_context().inputs["model"].id,
     )
 
     wandb.log(

diff --git a/classifier-e2e/steps/model_trainer.py b/classifier-e2e/steps/model_trainer.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
 from typing import Optional
 
@@ -23,6 +22,7 @@
 from typing_extensions import Annotated
 from utils.sagemaker_materializer import SagemakerMaterializer
 from zenml import ArtifactConfig, step
+from zenml.enums import ArtifactType
 from zenml.logger import get_logger
 
 logger = get_logger(__name__)
@@ -39,7 +39,10 @@
     target: Optional[str] = "target",
 ) -> Annotated[
     ClassifierMixin,
-    ArtifactConfig(name="breast_cancer_classifier", is_model_artifact=True),
+    ArtifactConfig(
+        name="breast_cancer_classifier",
+        artifact_tyoe=ArtifactType.MODEL,
+    ),
 ]:
     """Configure and train a model on the training dataset.