From 04c72cb3b063b88a6d99707f7a7312776ad7a3da Mon Sep 17 00:00:00 2001
From: PeterYang12 <yuhan.yang@intel.com>
Date: Sun, 11 Aug 2024 22:47:04 -0700
Subject: [PATCH 1/2] GMC: Add GPU support for GMC.

Enable NVIDIA GPU support for GMC, including sequence and switch
mode. Note that switch mode may fail due to NO enough GPU memory.

Signed-off-by: PeterYang12 <yuhan.yang@intel.com>
---
 .../config/samples/chatQnA_nv.yaml            |  68 ++++++++++
 .../config/samples/chatQnA_switch_nv.yaml     | 124 ++++++++++++++++++
 .../controller/gmconnector_controller.go      |   3 +
 microservices-connector/usage_guide.md        |   6 +-
 4 files changed, 200 insertions(+), 1 deletion(-)
 create mode 100644 microservices-connector/config/samples/chatQnA_nv.yaml
 create mode 100644 microservices-connector/config/samples/chatQnA_switch_nv.yaml

diff --git a/microservices-connector/config/samples/chatQnA_nv.yaml b/microservices-connector/config/samples/chatQnA_nv.yaml
new file mode 100644
index 00000000..ae8c0362
--- /dev/null
+++ b/microservices-connector/config/samples/chatQnA_nv.yaml
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: nvidia
+  name: chatqa
+  namespace: chatqa
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        internalService:
+          serviceName: embedding-svc
+          config:
+            endpoint: /v1/embeddings
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
+      - name: TeiEmbedding
+        internalService:
+          serviceName: tei-embedding-svc
+          isDownstreamService: true
+      - name: Retriever
+        data: $response
+        internalService:
+          serviceName: retriever-svc
+          config:
+            endpoint: /v1/retrieval
+            REDIS_URL: redis-vector-db
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
+      - name: VectorDB
+        internalService:
+          serviceName: redis-vector-db
+          isDownstreamService: true
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        internalService:
+          serviceName: llm-svc
+          config:
+            endpoint: /v1/chat/completions
+            TGI_LLM_ENDPOINT: tgi-service-m
+      - name: TgiNvidia
+        internalService:
+          serviceName: tgi-service-m
+          config:
+            endpoint: /generate
+          isDownstreamService: true
diff --git a/microservices-connector/config/samples/chatQnA_switch_nv.yaml b/microservices-connector/config/samples/chatQnA_switch_nv.yaml
new file mode 100644
index 00000000..dc2021b1
--- /dev/null
+++ b/microservices-connector/config/samples/chatQnA_switch_nv.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: nvidia
+  name: switch
+  namespace: switch
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        nodeName: node1
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        nodeName: node2
+    node1:
+      routerType: Switch
+      steps:
+        - name: Embedding
+          condition: embedding-model-id==large
+          internalService:
+            serviceName: embedding-svc-large
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Embedding
+          condition: embedding-model-id==small
+          internalService:
+            serviceName: embedding-svc-small
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge15
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge-small
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: Retriever
+          condition: embedding-model-id==large
+          data: $response
+          internalService:
+            serviceName: retriever-svc-large
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-large
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Retriever
+          condition: embedding-model-id==small
+          data: $response
+          internalService:
+            serviceName: retriever-svc-small
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-small
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-large
+            isDownstreamService: true
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-small
+            isDownstreamService: true
+    node2:
+      routerType: Switch
+      steps:
+        - name: Llm
+          condition: model-id==intel
+          internalService:
+            serviceName: llm-svc-intel
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-intel
+        - name: Llm
+          condition: model-id==llama
+          internalService:
+            serviceName: llm-svc-llama
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-llama
+        - name: TgiNvidia
+          internalService:
+            serviceName: tgi-service-intel
+            config:
+              endpoint: /generate
+              MODEL_ID: Intel/neural-chat-7b-v3-3
+            isDownstreamService: true
+        - name: TgiNvidia
+          internalService:
+            serviceName: tgi-service-llama
+            config:
+              endpoint: /generate
+              MODEL_ID: bigscience/bloom-560m
+            isDownstreamService: true
diff --git a/microservices-connector/internal/controller/gmconnector_controller.go b/microservices-connector/internal/controller/gmconnector_controller.go
index b74839e6..4537baf0 100644
--- a/microservices-connector/internal/controller/gmconnector_controller.go
+++ b/microservices-connector/internal/controller/gmconnector_controller.go
@@ -45,12 +45,14 @@ const (
 	TeiReranking             = "TeiReranking"
 	Tgi                      = "Tgi"
 	TgiGaudi                 = "TgiGaudi"
+	TgiNvidia                = "TgiNvidia"
 	Llm                      = "Llm"
 	DocSum                   = "DocSum"
 	Router                   = "router"
 	DataPrep                 = "DataPrep"
 	xeon                     = "xeon"
 	gaudi                    = "gaudi"
+	nvidia                   = "nvidia"
 	WebRetriever             = "WebRetriever"
 	yaml_dir                 = "/tmp/microservices/yamls/"
 	Service                  = "Service"
@@ -76,6 +78,7 @@ var yamlDict = map[string]string{
 	TeiReranking:      yaml_dir + "teirerank.yaml",
 	Tgi:               yaml_dir + "tgi.yaml",
 	TgiGaudi:          yaml_dir + "tgi_gaudi.yaml",
+	TgiNvidia:         yaml_dir + "tgi_nv.yaml",
 	Llm:               yaml_dir + "llm-uservice.yaml",
 	DocSum:            yaml_dir + "docsum-llm-uservice.yaml",
 	Router:            yaml_dir + "gmc-router.yaml",
diff --git a/microservices-connector/usage_guide.md b/microservices-connector/usage_guide.md
index 0b57cbf1..452b9568 100644
--- a/microservices-connector/usage_guide.md
+++ b/microservices-connector/usage_guide.md
@@ -14,6 +14,10 @@ A sample for chatQnA can be found at config/samples/chatQnA_xeon.yaml
 ```sh
 kubectl create ns chatqa
 kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml
+# To use Gaudi devive
+#kubectl apply -f $(pwd)/config/samples/chatQnA_gaudi.yaml
+# To use Nvidia GPU
+#kubectl apply -f $(pwd)/config/samples/chatQnA_nv.yaml
 ```
 
 **GMC will reconcile chatQnA custom resource and get all related components/services ready**
@@ -39,7 +43,7 @@ kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep i
 **Access the pipeline using the above URL from the client pod**
 
 ```bash
-export CLIENT_POD=$(kubectl get pod  -l app=client-test -o jsonpath={.items..metadata.name})
+export CLIENT_POD=$(kubectl get pod -n chatqa  -l app=client-test -o jsonpath={.items..metadata.name})
 export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
 kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
 ```

From 7e0476a283b5b3285ed00342a31b8dc75a7b7e66 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 01:12:29 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 microservices-connector/usage_guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/microservices-connector/usage_guide.md b/microservices-connector/usage_guide.md
index 452b9568..710faf2c 100644
--- a/microservices-connector/usage_guide.md
+++ b/microservices-connector/usage_guide.md
@@ -14,7 +14,7 @@ A sample for chatQnA can be found at config/samples/chatQnA_xeon.yaml
 ```sh
 kubectl create ns chatqa
 kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml
-# To use Gaudi devive
+# To use Gaudi device
 #kubectl apply -f $(pwd)/config/samples/chatQnA_gaudi.yaml
 # To use Nvidia GPU
 #kubectl apply -f $(pwd)/config/samples/chatQnA_nv.yaml