zenml-io · avishniakov · Feb 19, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 8, 2024
diff --git a/.gitignore b/.gitignore
@@ -103,7 +103,7 @@ celerybeat.pid
 
 # Environments
 .env
-.venv
+.venv*
 env/
 venv/
 ENV/

diff --git a/stack-showcase/Dockerfile b/stack-showcase/Dockerfile
diff --git a/stack-showcase/_assets/cloud_mcp.png b/stack-showcase/_assets/cloud_mcp.png
diff --git a/stack-showcase/_assets/cloud_mcp_predictions.png b/stack-showcase/_assets/cloud_mcp_predictions.png
diff --git a/stack-showcase/_assets/cloud_mcp_screenshot.png b/stack-showcase/_assets/cloud_mcp_screenshot.png
diff --git a/stack-showcase/app.py b/stack-showcase/app.py
diff --git a/stack-showcase/configs/deployment.yaml b/stack-showcase/configs/deployment.yaml
diff --git a/stack-showcase/configs/feature_engineering.yaml b/stack-showcase/configs/feature_engineering.yaml
@@ -3,10 +3,8 @@ settings:
   docker:
     required_integrations:
       - sklearn
+    requirements:
+      - pyarrow
 
-# configuration of the Model Control Plane
-model:
-  name: breast_cancer_classifier
-  license: Apache 2.0
-  description: Classification of Breast Cancer Dataset.
-  tags: ["classification", "sklearn"]
+# pipeline configuration
+test_size: 0.35
diff --git a/stack-showcase/configs/inference.yaml b/stack-showcase/configs/inference.yaml
@@ -3,11 +3,10 @@ settings:
   docker:
     required_integrations:
       - sklearn
+    requirements:
+      - pyarrow
 
 # configuration of the Model Control Plane
 model:
-  name: breast_cancer_classifier
-  version: production
-  license: Apache 2.0
-  description: Classification of Breast Cancer Dataset.
-  tags: ["classification", "sklearn"]
+  name: "breast_cancer_classifier"
+  version: "production"
diff --git a/stack-showcase/configs/training_rf.yaml b/stack-showcase/configs/training_rf.yaml
@@ -0,0 +1,18 @@
+# environment configuration
+settings:
+  docker:
+    required_integrations:
+      - sklearn
+    requirements:
+      - pyarrow
+
+# configuration of the Model Control Plane
+model:
+  name: breast_cancer_classifier
+  license: Apache 2.0
+  description: A breast cancer classifier
+  tags: ["breast_cancer", "classifier","rf"]
+
+# Configure the pipeline
+parameters:
+  model_type: "rf"  # Choose between rf/sgd
diff --git a/stack-showcase/configs/training.yaml → ...owcase/configs/training_rf_sagemaker.yaml b/stack-showcase/configs/training.yaml → ...owcase/configs/training_rf_sagemaker.yaml
@@ -5,18 +5,22 @@ settings:
       - sklearn
     requirements:
       - pyarrow
-      - huggingface_hub
 
 # configuration of the Model Control Plane
 model:
   name: breast_cancer_classifier
   license: Apache 2.0
-  description: Classification of Breast Cancer Dataset.
-  tags: ["classification", "sklearn"]
+  description: A breast cancer classifier
+  tags: ["breast_cancer", "classifier","rf"]
+
+# Configure the pipeline
+parameters:
+  model_type: "rf"  # Choose between rf/sgd
 
 steps:
   model_trainer:
+    step_operator: sagemaker-eu
     settings:
       step_operator.sagemaker:
         estimator_args: 
-          instance_type: "ml.m5.large"
+          instance_type : ml.m5.large
diff --git a/stack-showcase/configs/training_sgd.yaml b/stack-showcase/configs/training_sgd.yaml
@@ -0,0 +1,18 @@
+# environment configuration
+settings:
+  docker:
+    required_integrations:
+      - sklearn
+    requirements:
+      - pyarrow
+
+# configuration of the Model Control Plane
+model:
+  name: breast_cancer_classifier
+  license: Apache 2.0
+  description: A breast cancer classifier
+  tags: ["breast_cancer", "classifier","sgd"]
+
+# Configure the pipeline
+parameters:
+  model_type: "sgd"  # Choose between rf/sgd
diff --git a/stack-showcase/configs/training_sgd_sagemaker.yaml b/stack-showcase/configs/training_sgd_sagemaker.yaml
@@ -0,0 +1,26 @@
+# environment configuration
+settings:
+  docker:
+    required_integrations:
+      - sklearn
+    requirements:
+      - pyarrow
+
+# configuration of the Model Control Plane
+model:
+  name: breast_cancer_classifier
+  license: Apache 2.0
+  description: A breast cancer classifier
+  tags: ["breast_cancer", "classifier","sgd"]
+
+# Configure the pipeline
+parameters:
+  model_type: "sgd"  # Choose between rf/sgd
+
+steps:
+  model_trainer:
+    step_operator: sagemaker-eu
+    settings:
+      step_operator.sagemaker:
+        estimator_args: 
+          instance_type : ml.m5.large
diff --git a/stack-showcase/pipelines/__init__.py b/stack-showcase/pipelines/__init__.py
@@ -1,6 +1,20 @@
-# {% include 'template/license_header' %}
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 
 from .feature_engineering import feature_engineering
 from .inference import inference
-from .training import breast_cancer_training
-from .deployment import breast_cancer_deployment_pipeline
+from .training import training
diff --git a/stack-showcase/pipelines/deployment.py b/stack-showcase/pipelines/deployment.py
diff --git a/stack-showcase/pipelines/feature_engineering.py b/stack-showcase/pipelines/feature_engineering.py
@@ -1,13 +1,29 @@
-# {% include 'template/license_header' %}
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 
-import random
 from typing import List, Optional
+import random
 
 from steps import (
     data_loader,
     data_preprocessor,
     data_splitter,
 )
+
 from zenml import pipeline
 from zenml.logger import get_logger
 
@@ -21,6 +37,7 @@ def feature_engineering(
     normalize: Optional[bool] = None,
     drop_columns: Optional[List[str]] = None,
     target: Optional[str] = "target",
+    random_state: int = None,
 ):
     """
     Feature engineering pipeline.
@@ -34,11 +51,16 @@ def feature_engineering(
         normalize: If `True` dataset will be normalized with MinMaxScaler
         drop_columns: List of columns to drop from dataset
         target: Name of target column in dataset
+        random_state: Random state to configure the data loader
+
+    Returns:
+        The processed datasets (dataset_trn, dataset_tst).
     """
-    ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
     # Link all the steps together by calling them and passing the output
     # of one step as the input of the next step.
-    raw_data = data_loader(random_state=random.randint(0, 100), target=target)
+    if random_state is None:
+        random_state = random.randint(0,1000)
+    raw_data = data_loader(random_state=random_state, target=target)
     dataset_trn, dataset_tst = data_splitter(
         dataset=raw_data,
         test_size=test_size,
@@ -50,5 +72,6 @@ def feature_engineering(
         normalize=normalize,
         drop_columns=drop_columns,
         target=target,
+        random_state=random_state,
     )
     return dataset_trn, dataset_tst
-Original file line number
+Diff line change
@@ Expand Up / @@ -103,7 +103,7 @@ celerybeat.pid @@
     # Environments
     .env
-    .venv
+    .venv*
     env/
     venv/
     ENV/
@@ Expand Down @@