zenml-io · safoinme · Dec 8, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/llm-complete-guide/.env.local b/llm-complete-guide/.env.local
@@ -0,0 +1,2 @@
+MODELS=[{"name":"llm-complete-rag-webui","parameters":{"temperature":0.5,"max_new_tokens":1024},"endpoints":[{"type":"openai","baseURL":"http://localhost:3000/generate"}]}]
+
diff --git a/llm-complete-guide/gh_action_rag.py b/llm-complete-guide/gh_action_rag.py
@@ -21,12 +21,10 @@
 
 import click
 import yaml
-from zenml.enums import PluginSubType
-
 from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
-from zenml.client import Client
 from zenml import Model
-from zenml.exceptions import ZenKeyError
+from zenml.client import Client
+from zenml.enums import PluginSubType
 
 
 @click.command(
@@ -89,7 +87,7 @@ def main(
     zenml_model_name: Optional[str] = "zenml-docs-qa-rag",
     zenml_model_version: Optional[str] = None,
 ):
-    """ 
+    """
     Executes the pipeline to train a basic RAG model.
 
     Args:
@@ -108,14 +106,14 @@ def main(
         config = yaml.safe_load(file)
 
     # Read the model version from a file in the root of the repo
-    #  called "ZENML_VERSION.txt". 
+    #  called "ZENML_VERSION.txt".
     if zenml_model_version == "staging":
         postfix = "-rc0"
     elif zenml_model_version == "production":
         postfix = ""
     else:
         postfix = "-dev"
-  
+
     if Path("ZENML_VERSION.txt").exists():
         with open("ZENML_VERSION.txt", "r") as file:
             zenml_model_version = file.read().strip()
@@ -177,7 +175,7 @@ def main(
                 service_account_id=service_account_id,
                 auth_window=0,
                 flavor="builtin",
-                action_type=PluginSubType.PIPELINE_RUN
+                action_type=PluginSubType.PIPELINE_RUN,
             ).id
             client.create_trigger(
                 name="Production Trigger LLM-Complete",

diff --git a/llm-complete-guide/pipelines/__init__.py b/llm-complete-guide/pipelines/__init__.py
@@ -19,5 +19,7 @@
 from pipelines.generate_chunk_questions import generate_chunk_questions
 from pipelines.llm_basic_rag import llm_basic_rag
 from pipelines.llm_eval import llm_eval
+from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
+from pipelines.local_deployment import local_deployment
+from pipelines.prod_deployment import production_deployment
 from pipelines.rag_deployment import rag_deployment
-from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
diff --git a/llm-complete-guide/pipelines/finetune_embeddings.py b/llm-complete-guide/pipelines/finetune_embeddings.py
@@ -12,7 +12,6 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 
-from constants import EMBEDDINGS_MODEL_NAME_ZENML
 from steps.finetune_embeddings import (
     evaluate_base_model,
     evaluate_finetuned_model,

diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from litellm import config_path
 
 from steps.populate_index import (
     generate_embeddings,

diff --git a/llm-complete-guide/pipelines/llm_index_and_evaluate.py b/llm-complete-guide/pipelines/llm_index_and_evaluate.py
@@ -15,9 +15,10 @@
 # limitations under the License.
 #
 
-from pipelines import llm_basic_rag, llm_eval
 from zenml import pipeline
 
+from pipelines import llm_basic_rag, llm_eval
+
 
 @pipeline
 def llm_index_and_evaluate() -> None:

diff --git a/llm-complete-guide/pipelines/local_deployment.py b/llm-complete-guide/pipelines/local_deployment.py
@@ -0,0 +1,9 @@
+from steps.bento_builder import bento_builder
+from steps.bento_deployment import bento_deployment
+from zenml import pipeline
+
+
+@pipeline(enable_cache=False)
+def local_deployment():
+    bento = bento_builder()
+    bento_deployment(bento)
diff --git a/llm-complete-guide/pipelines/prod_deployment.py b/llm-complete-guide/pipelines/prod_deployment.py
@@ -0,0 +1,32 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from steps.bento_dockerizer import bento_dockerizer
+from steps.k8s_deployment import k8s_deployment
+from steps.visualize_chat import create_chat_interface
+from zenml import pipeline
+
+
+@pipeline(enable_cache=False)
+def production_deployment():
+    """Model deployment pipeline.
+
+    This is a pipeline deploys trained model for future inference.
+    """
+    bento_model_image = bento_dockerizer()
+    deployment_info = k8s_deployment(bento_model_image)
+    create_chat_interface(deployment_info)
diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -47,12 +47,14 @@
     generate_synthetic_data,
     llm_basic_rag,
     llm_eval,
-    rag_deployment,
     llm_index_and_evaluate,
+    local_deployment,
+    production_deployment,
+    rag_deployment,
 )
 from structures import Document
-from zenml.materializers.materializer_registry import materializer_registry
 from zenml import Model
+from zenml.materializers.materializer_registry import materializer_registry
 
 logger = get_logger(__name__)
 
@@ -95,6 +97,13 @@
     default="gpt4",
     help="The model to use for the completion.",
 )
+@click.option(
+    "--query-text",
+    "query_text",
+    required=False,
+    default=None,
+    help="The query text to use for the completion.",
+)
 @click.option(
     "--zenml-model-name",
     "zenml_model_name",
@@ -136,6 +145,12 @@
     default=None,
     help="Path to config",
 )
+@click.option(
+    "--env",
+    "env",
+    default="local",
+    help="The environment to use for the completion.",
+)
 def main(
     pipeline: str,
     query_text: Optional[str] = None,
@@ -146,6 +161,7 @@ def main(
     use_argilla: bool = False,
     use_reranker: bool = False,
     config: Optional[str] = None,
+    env: str = "local",
 ):
     """Main entry point for the pipeline execution.
 
@@ -159,6 +175,7 @@ def main(
         use_argilla (bool): If True, Argilla an notations will be used
         use_reranker (bool): If True, rerankers will be used
         config (Optional[str]): Path to config file
+        env (str): The environment to use for the deployment (local, huggingface space, k8s etc.)
     """
     pipeline_args = {"enable_cache": not no_cache}
     embeddings_finetune_args = {
@@ -169,9 +186,9 @@ def main(
             }
         },
     }
-    
+
     # Read the model version from a file in the root of the repo
-    #  called "ZENML_VERSION.txt".    
+    #  called "ZENML_VERSION.txt".
     if zenml_model_version == "staging":
         postfix = "-rc0"
     elif zenml_model_version == "production":
@@ -181,8 +198,10 @@ def main(
 
     if Path("ZENML_VERSION.txt").exists():
         with open("ZENML_VERSION.txt", "r") as file:
-            zenml_model_version = file.read().strip()
-            zenml_model_version += postfix
+            zenml_version = file.read().strip()
+            zenml_version += postfix
+            # zenml_model_version = file.read().strip()
+            # zenml_model_version += postfix
     else:
         raise RuntimeError(
             "No model version file found. Please create a file called ZENML_VERSION.txt in the root of the repo with the model version."
@@ -191,7 +210,7 @@ def main(
     # Create ZenML model
     zenml_model = Model(
         name=zenml_model_name,
-        version=zenml_model_version,
+        version=zenml_version,
         license="Apache 2.0",
         description="RAG application for ZenML docs",
         tags=["rag", "finetuned", "chatbot"],
@@ -251,8 +270,19 @@ def main(
         )()
 
     elif pipeline == "deploy":
-        rag_deployment.with_options(model=zenml_model, **pipeline_args)()
-
+        zenml_model.version = zenml_model_version
+        if env == "local":
+            local_deployment.with_options(
+                model=zenml_model, config_path=config_path, **pipeline_args
+            )()
+        elif env == "huggingface":
+            rag_deployment.with_options(
+                model=zenml_model, config_path=config_path, **pipeline_args
+            )()
+        elif env == "k8s":
+            production_deployment.with_options(
+                model=zenml_model, config_path=config_path, **pipeline_args
+            )()
     elif pipeline == "evaluation":
         pipeline_args["enable_cache"] = False
         llm_eval.with_options(model=zenml_model, config_path=config_path)()
@@ -264,7 +294,9 @@ def main(
 
     elif pipeline == "embeddings":
         finetune_embeddings.with_options(
-            model=zenml_model, config_path=config_path, **embeddings_finetune_args
+            model=zenml_model,
+            config_path=config_path,
+            **embeddings_finetune_args,
         )()
 
     elif pipeline == "chunks":
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		MODELS=[{"name":"llm-complete-rag-webui","parameters":{"temperature":0.5,"max_new_tokens":1024},"endpoints":[{"type":"openai","baseURL":"http://localhost:3000/generate"}]}]