diff --git a/.github/workflows/_helm-e2e.yaml b/.github/workflows/_helm-e2e.yaml index 183d8148..b54e163f 100644 --- a/.github/workflows/_helm-e2e.yaml +++ b/.github/workflows/_helm-e2e.yaml @@ -79,7 +79,7 @@ jobs: # insert a prefix before opea/.*, the prefix is OPEA_IMAGE_REPO find . -name '*values.yaml' -type f -exec sed -i "s#repository: opea/*#repository: ${OPEA_IMAGE_REPO}opea/#g" {} \; # set OPEA image tag to ${{ inputs.tag }} - find . -name '*values.yaml' -type f -exec sed -i 's#tag: ""#tag: ${{ inputs.tag }}#g' {} \; + find . -name '*values.yaml' -type f -exec sed -i 's#tag: "latest"#tag: ${{ inputs.tag }}#g' {} \; # set huggingface token find . -name '*values.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#${HFTOKEN}#g" {} \; # replace the mount dir "Volume: *" with "Volume: $CHART_MOUNT" diff --git a/.github/workflows/manual-freeze-tag.yaml b/.github/workflows/manual-freeze-tag.yaml index f7be30bc..2f2c23fc 100644 --- a/.github/workflows/manual-freeze-tag.yaml +++ b/.github/workflows/manual-freeze-tag.yaml @@ -6,24 +6,19 @@ name: Freeze helm release tag in helm charts on manual event on: workflow_dispatch: inputs: - oldappversion: - default: "v0.8" - description: "Old appVersion to be replaced" - required: true - type: string - newappversion: - default: "v0.9" - description: "New appVersion to replace" - required: true - type: string oldversion: - default: "0.8.0" - description: "Old version to be replaced" + default: "0.9.0" + description: "Old helm version to be replaced" required: true type: string newversion: - default: "0.9.0" - description: "New version to replace" + default: "1.0.0" + description: "New helm version to replace" + required: true + type: string + imageversion: + default: "v1.0" + description: "New image version to replace" required: true type: string @@ -46,11 +41,11 @@ jobs: - name: Run script env: - NEWTAG: ${{ inputs.newappversion }} + NEWTAG: ${{ inputs.imageversion }} run: | - find helm-charts/ -name 'Chart.yaml' -type f -exec sed -i "s#appVersion: \"${{ inputs.oldappversion }}\"#appVersion: \"${{ inputs.newappversion }}\"#g" {} \; + find helm-charts/ -name '*values.yaml' -type f -exec sed -i "s#tag: \"latest\"#tag: \"${imageversion}\"#g" {} \; find helm-charts/ -name 'Chart.yaml' -type f -exec sed -i "s#version: ${{ inputs.oldversion }}#version: ${{ inputs.newversion }}#g" {} \; - find microservices-connector/helm/ -name 'Chart.yaml' -type f -exec sed -i "s#appVersion: \"${{ inputs.oldappversion }}\"#appVersion: \"${{ inputs.newappversion }}\"#g" {} \; + find microservices-connector/helm/ -name '*values.yaml' -type f -exec sed -i "s#tag: \"latest\"#tag: \"${imageversion}\"#g" {} \; find microservices-connector/helm/ -name 'Chart.yaml' -type f -exec sed -i "s#version: ${{ inputs.oldversion }}#version: ${{ inputs.newversion }}#g" {} \; sed -i "s|opea/gmcrouter:latest|opea/gmcrouter:$NEWTAG|g" microservices-connector/config/gmcrouter/gmc-router.yaml sed -i "s|opea/gmcmanager:latest|opea/gmcmanager:$NEWTAG|g" microservices-connector/config/manager/gmc-manager.yaml diff --git a/.github/workflows/push-image-build.yaml b/.github/workflows/push-image-build.yaml index 8ed21b0e..80af7a7f 100644 --- a/.github/workflows/push-image-build.yaml +++ b/.github/workflows/push-image-build.yaml @@ -4,16 +4,16 @@ name: Upgrade GMC system on push event on: - # push: - # branches: ["main"] - # paths: - # - microservices-connector/** - # - "!microservices-connector/helm/**" - # - "!**.md" - # - "!**.txt" - # - "!**.png" - # - "!.**" - # - .github/workflows/gmc-on-push.yaml + push: + branches: ["main"] + paths: + - microservices-connector/** + - "!microservices-connector/helm/**" + - "!**.md" + - "!**.txt" + - "!**.png" + - "!.**" + - .github/workflows/gmc-on-push.yaml workflow_dispatch: concurrency: diff --git a/.github/workflows/scripts/e2e/gmc_xeon_test.sh b/.github/workflows/scripts/e2e/gmc_xeon_test.sh index 7d92fe71..39f1716e 100755 --- a/.github/workflows/scripts/e2e/gmc_xeon_test.sh +++ b/.github/workflows/scripts/e2e/gmc_xeon_test.sh @@ -21,6 +21,7 @@ MODIFY_STEP_NAMESPACE="${APP_NAMESPACE}-modstep" WEBHOOK_NAMESPACE="${APP_NAMESPACE}-webhook" function validate_gmc() { + mkdir -p ${LOG_PATH} echo "validate audio-qna" validate_audioqa @@ -152,7 +153,7 @@ function validate_audioqa() { accessUrl=$(kubectl get gmc -n $AUDIOQA_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") byte_str=$(kubectl exec "$CLIENT_POD" -n $AUDIOQA_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) if [ -z "$byte_str" ]; then - echo "audioqa failed, please check the the!" + echo "audioqa failed!" exit 1 fi echo "Audioqa response check succeed!" @@ -452,10 +453,14 @@ function validate_modify_config() { #change the model id of the step named "Tgi" in the codegen_xeon_mod.yaml yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "HuggingFaceH4/mistral-7b-grok"' $(pwd)/config/samples/CodeGen/codegen_xeon_mod.yaml kubectl apply -f $(pwd)/config/samples/CodeGen/codegen_xeon_mod.yaml - #you are supposed to see an error, it's a known issue, but it does not affect the tests - #https://github.com/opea-project/GenAIInfra/issues/314 - pods_count=$(kubectl get pods -n $MODIFY_STEP_NAMESPACE -o jsonpath='{.items[*].metadata.name}' | wc -w) + # Wait until all pods are ready + wait_until_all_pod_ready $MODIFY_STEP_NAMESPACE 300s + if [ $? -ne 0 ]; then + echo "Error Some pods are not ready!" + exit 1 + fi + check_gmc_status $MODIFY_STEP_NAMESPACE 'codegen' $pods_count 0 3 if [ $? -ne 0 ]; then echo "GMC status is not as expected" diff --git a/.github/workflows/scripts/e2e/utils.sh b/.github/workflows/scripts/e2e/utils.sh index f78834eb..1c534c5d 100755 --- a/.github/workflows/scripts/e2e/utils.sh +++ b/.github/workflows/scripts/e2e/utils.sh @@ -54,7 +54,7 @@ function wait_until_all_pod_ready() { timeout=$2 echo "Wait for all pods in NS $namespace to be ready..." - pods=$(kubectl get pods -n $namespace --no-headers -o custom-columns=":metadata.name") + pods=$(kubectl get pods -n $namespace --no-headers | grep -v "Terminating" | awk '{print $1}') # Loop through each pod echo "$pods" | while read -r line; do pod_name=$line diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml index aba726a5..28f8e07a 100644 --- a/helm-charts/chatqna/Chart.yaml +++ b/helm-charts/chatqna/Chart.yaml @@ -7,31 +7,31 @@ description: The Helm chart to deploy ChatQnA type: application dependencies: - name: tgi - version: 0.9.0 + version: 1.0.0 repository: "file://../common/tgi" - name: llm-uservice - version: 0.9.0 + version: 1.0.0 repository: "file://../common/llm-uservice" - name: tei - version: 0.9.0 + version: 1.0.0 repository: "file://../common/tei" - name: embedding-usvc - version: 0.9.0 + version: 1.0.0 repository: "file://../common/embedding-usvc" - name: teirerank - version: 0.9.0 + version: 1.0.0 repository: "file://../common/teirerank" - name: reranking-usvc - version: 0.9.0 + version: 1.0.0 repository: "file://../common/reranking-usvc" - name: redis-vector-db - version: 0.9.0 + version: 1.0.0 repository: "file://../common/redis-vector-db" - name: retriever-usvc - version: 0.9.0 + version: 1.0.0 repository: "file://../common/retriever-usvc" - name: data-prep - version: 0.9.0 + version: 1.0.0 repository: "file://../common/data-prep" -version: 0.9.0 -appVersion: "v0.9" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml index a7a115f9..11fa7ae9 100644 --- a/helm-charts/chatqna/values.yaml +++ b/helm-charts/chatqna/values.yaml @@ -11,7 +11,7 @@ image: repository: opea/chatqna pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" port: 8888 service: diff --git a/helm-charts/codegen/Chart.yaml b/helm-charts/codegen/Chart.yaml index 8c9ba31d..27bb80f5 100644 --- a/helm-charts/codegen/Chart.yaml +++ b/helm-charts/codegen/Chart.yaml @@ -7,10 +7,10 @@ description: The Helm chart to deploy CodeGen type: application dependencies: - name: tgi - version: 0.9.0 + version: 1.0.0 repository: "file://../common/tgi" - name: llm-uservice - version: 0.9.0 + version: 1.0.0 repository: "file://../common/llm-uservice" -version: 0.9.0 -appVersion: "v0.9" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml index 306c5f05..2e69e2ec 100644 --- a/helm-charts/codegen/values.yaml +++ b/helm-charts/codegen/values.yaml @@ -11,7 +11,7 @@ image: repository: opea/codegen pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" port: 7778 service: diff --git a/helm-charts/codetrans/Chart.yaml b/helm-charts/codetrans/Chart.yaml index 300683da..42228eb3 100644 --- a/helm-charts/codetrans/Chart.yaml +++ b/helm-charts/codetrans/Chart.yaml @@ -7,10 +7,10 @@ description: The Helm chart to deploy CodeTrans type: application dependencies: - name: tgi - version: 0.9.0 + version: 1.0.0 repository: "file://../common/tgi" - name: llm-uservice - version: 0.9.0 + version: 1.0.0 repository: "file://../common/llm-uservice" -version: 0.9.0 -appVersion: "v0.9" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/codetrans/values.yaml b/helm-charts/codetrans/values.yaml index 621c4a04..e06a05fb 100644 --- a/helm-charts/codetrans/values.yaml +++ b/helm-charts/codetrans/values.yaml @@ -12,7 +12,7 @@ image: repository: opea/codetrans pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" port: 7777 service: diff --git a/helm-charts/common/asr/Chart.yaml b/helm-charts/common/asr/Chart.yaml index 24894ce3..4cce9d6e 100644 --- a/helm-charts/common/asr/Chart.yaml +++ b/helm-charts/common/asr/Chart.yaml @@ -5,11 +5,11 @@ apiVersion: v2 name: asr description: The Helm chart for deploying asr as microservice type: application -version: 0.9.0 +version: 1.0.0 # The asr microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: whisper - version: 0.9.0 + version: 1.0.0 repository: file://../whisper condition: autodependency.enabled diff --git a/helm-charts/common/asr/values.yaml b/helm-charts/common/asr/values.yaml index f8597c91..d1167ff3 100644 --- a/helm-charts/common/asr/values.yaml +++ b/helm-charts/common/asr/values.yaml @@ -16,7 +16,7 @@ image: repository: opea/asr pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/chatqna-ui/Chart.yaml b/helm-charts/common/chatqna-ui/Chart.yaml index 0c02ae6f..8fdc9fa0 100644 --- a/helm-charts/common/chatqna-ui/Chart.yaml +++ b/helm-charts/common/chatqna-ui/Chart.yaml @@ -5,5 +5,5 @@ apiVersion: v2 name: chatqna-ui description: A Helm chart to the UI for chatQnA workload type: application -version: 0.9.0 -appVersion: "v0.9" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/common/chatqna-ui/values.yaml b/helm-charts/common/chatqna-ui/values.yaml index bb28851f..d62fddec 100644 --- a/helm-charts/common/chatqna-ui/values.yaml +++ b/helm-charts/common/chatqna-ui/values.yaml @@ -11,7 +11,7 @@ image: repository: opea/chatqna-conversation-ui pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/data-prep/Chart.yaml b/helm-charts/common/data-prep/Chart.yaml index 7c8f434f..a001cda9 100644 --- a/helm-charts/common/data-prep/Chart.yaml +++ b/helm-charts/common/data-prep/Chart.yaml @@ -5,15 +5,15 @@ apiVersion: v2 name: data-prep description: The Helm chart for deploying data prep as microservice type: application -version: 0.9.0 +version: 1.0.0 # The data prep microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: tei - version: 0.9.0 + version: 1.0.0 repository: file://../tei condition: autodependency.enabled - name: redis-vector-db - version: 0.9.0 + version: 1.0.0 repository: file://../redis-vector-db condition: autodependency.enabled diff --git a/helm-charts/common/data-prep/templates/configmap.yaml b/helm-charts/common/data-prep/templates/configmap.yaml index 34a111df..44b1559e 100644 --- a/helm-charts/common/data-prep/templates/configmap.yaml +++ b/helm-charts/common/data-prep/templates/configmap.yaml @@ -20,6 +20,8 @@ data: REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379" {{- end }} INDEX_NAME: {{ .Values.INDEX_NAME | quote }} + KEY_INDEX_NAME: {{ .Values.KEY_INDEX_NAME | quote }} + SEARCH_BATCH_SIZE: {{ .Values.SEARCH_BATCH_SIZE | quote }} HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} HF_HOME: "/tmp/.cache/huggingface" {{- if .Values.global.HF_ENDPOINT }} diff --git a/helm-charts/common/data-prep/values.yaml b/helm-charts/common/data-prep/values.yaml index 458d7c0d..62f98b4d 100644 --- a/helm-charts/common/data-prep/values.yaml +++ b/helm-charts/common/data-prep/values.yaml @@ -14,7 +14,7 @@ image: repository: opea/dataprep-redis pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" @@ -88,6 +88,8 @@ EMBED_MODEL: "" # redis DB service URL, e.g. redis://: REDIS_URL: "" INDEX_NAME: "rag-redis" +KEY_INDEX_NAME: "file-keys" +SEARCH_BATCH_SIZE: 10 global: http_proxy: "" diff --git a/helm-charts/common/embedding-usvc/Chart.yaml b/helm-charts/common/embedding-usvc/Chart.yaml index 30d21ec5..4738a13b 100644 --- a/helm-charts/common/embedding-usvc/Chart.yaml +++ b/helm-charts/common/embedding-usvc/Chart.yaml @@ -5,11 +5,11 @@ apiVersion: v2 name: embedding-usvc description: The Helm chart for deploying embedding as microservice type: application -version: 0.9.0 +version: 1.0.0 # The embedding microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: tei - version: 0.9.0 + version: 1.0.0 repository: file://../tei condition: autodependency.enabled diff --git a/helm-charts/common/embedding-usvc/values.yaml b/helm-charts/common/embedding-usvc/values.yaml index f3b1f9e8..f12df1af 100644 --- a/helm-charts/common/embedding-usvc/values.yaml +++ b/helm-charts/common/embedding-usvc/values.yaml @@ -15,7 +15,7 @@ image: repository: opea/embedding-tei pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/llm-uservice/Chart.yaml b/helm-charts/common/llm-uservice/Chart.yaml index e587ae7d..77a78061 100644 --- a/helm-charts/common/llm-uservice/Chart.yaml +++ b/helm-charts/common/llm-uservice/Chart.yaml @@ -5,11 +5,11 @@ apiVersion: v2 name: llm-uservice description: The Helm chart for deploying llm as microservice type: application -version: 0.9.0 +version: 1.0.0 # The llm microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: tgi - version: 0.9.0 + version: 1.0.0 repository: file://../tgi condition: autodependency.enabled diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml index 75972694..dd55dc93 100644 --- a/helm-charts/common/llm-uservice/values.yaml +++ b/helm-charts/common/llm-uservice/values.yaml @@ -15,7 +15,7 @@ image: repository: opea/llm-tgi pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/redis-vector-db/Chart.yaml b/helm-charts/common/redis-vector-db/Chart.yaml index 62b1fa31..0e882b28 100644 --- a/helm-charts/common/redis-vector-db/Chart.yaml +++ b/helm-charts/common/redis-vector-db/Chart.yaml @@ -5,5 +5,5 @@ apiVersion: v2 name: redis-vector-db description: The Helm chart for Redis Vector DB type: application -version: 0.9.0 +version: 1.0.0 appVersion: "7.2.0-v9" diff --git a/helm-charts/common/reranking-usvc/Chart.yaml b/helm-charts/common/reranking-usvc/Chart.yaml index 27ab1857..e781a609 100644 --- a/helm-charts/common/reranking-usvc/Chart.yaml +++ b/helm-charts/common/reranking-usvc/Chart.yaml @@ -5,11 +5,11 @@ apiVersion: v2 name: reranking-usvc description: The Helm chart for deploying reranking as microservice type: application -version: 0.9.0 +version: 1.0.0 # The reranking microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: teirerank - version: 0.9.0 + version: 1.0.0 repository: file://../teirerank condition: autodependency.enabled diff --git a/helm-charts/common/reranking-usvc/values.yaml b/helm-charts/common/reranking-usvc/values.yaml index c011cf30..7fee759f 100644 --- a/helm-charts/common/reranking-usvc/values.yaml +++ b/helm-charts/common/reranking-usvc/values.yaml @@ -15,7 +15,7 @@ image: repository: opea/reranking-tei pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/retriever-usvc/Chart.yaml b/helm-charts/common/retriever-usvc/Chart.yaml index 82b7b8ff..020eb26c 100644 --- a/helm-charts/common/retriever-usvc/Chart.yaml +++ b/helm-charts/common/retriever-usvc/Chart.yaml @@ -5,15 +5,15 @@ apiVersion: v2 name: retriever-usvc description: The Helm chart for deploying retriever as microservice type: application -version: 0.9.0 +version: 1.0.0 # The retriever microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: tei - version: 0.9.0 + version: 1.0.0 repository: file://../tei condition: autodependency.enabled - name: redis-vector-db - version: 0.9.0 + version: 1.0.0 repository: file://../redis-vector-db condition: autodependency.enabled diff --git a/helm-charts/common/retriever-usvc/values.yaml b/helm-charts/common/retriever-usvc/values.yaml index 16186a06..3128287e 100644 --- a/helm-charts/common/retriever-usvc/values.yaml +++ b/helm-charts/common/retriever-usvc/values.yaml @@ -20,7 +20,7 @@ image: repository: opea/retriever-redis pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/speecht5/Chart.yaml b/helm-charts/common/speecht5/Chart.yaml index 333affac..4927c825 100644 --- a/helm-charts/common/speecht5/Chart.yaml +++ b/helm-charts/common/speecht5/Chart.yaml @@ -5,6 +5,6 @@ apiVersion: v2 name: speecht5 description: The Helm chart for deploying speecht5 as microservice type: application -version: 0.9.0 +version: 1.0.0 # The speecht5 microservice server version -appVersion: "v0.9" +appVersion: "v1.0" diff --git a/helm-charts/common/speecht5/gaudi-values.yaml b/helm-charts/common/speecht5/gaudi-values.yaml index a56c8232..aefd9f37 100644 --- a/helm-charts/common/speecht5/gaudi-values.yaml +++ b/helm-charts/common/speecht5/gaudi-values.yaml @@ -7,7 +7,7 @@ image: repository: opea/speecht5-gaudi - tag: "" + tag: "latest" resources: limits: diff --git a/helm-charts/common/speecht5/values.yaml b/helm-charts/common/speecht5/values.yaml index f6abcc0c..e0c23cbc 100644 --- a/helm-charts/common/speecht5/values.yaml +++ b/helm-charts/common/speecht5/values.yaml @@ -14,7 +14,7 @@ image: repository: opea/speecht5 pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/tei/Chart.yaml b/helm-charts/common/tei/Chart.yaml index 336b8250..374df771 100644 --- a/helm-charts/common/tei/Chart.yaml +++ b/helm-charts/common/tei/Chart.yaml @@ -5,6 +5,6 @@ apiVersion: v2 name: tei description: The Helm chart for HuggingFace Text Embedding Inference Server type: application -version: 0.9.0 +version: 1.0.0 # The HF TEI version appVersion: "cpu-1.5" diff --git a/helm-charts/common/teirerank/Chart.yaml b/helm-charts/common/teirerank/Chart.yaml index e030b1fe..738fda86 100644 --- a/helm-charts/common/teirerank/Chart.yaml +++ b/helm-charts/common/teirerank/Chart.yaml @@ -5,6 +5,6 @@ apiVersion: v2 name: teirerank description: The Helm chart for HuggingFace Text Embedding Inference Server type: application -version: 0.9.0 +version: 1.0.0 # The HF TEI version appVersion: "cpu-1.5" diff --git a/helm-charts/common/tgi/Chart.yaml b/helm-charts/common/tgi/Chart.yaml index 48541e6d..7ab58f82 100644 --- a/helm-charts/common/tgi/Chart.yaml +++ b/helm-charts/common/tgi/Chart.yaml @@ -5,6 +5,6 @@ apiVersion: v2 name: tgi description: The Helm chart for HuggingFace Text Generation Inference Server type: application -version: 0.9.0 +version: 1.0.0 # The HF TGI version appVersion: "2.1.0" diff --git a/helm-charts/common/tts/Chart.yaml b/helm-charts/common/tts/Chart.yaml index 71ad351e..b82cf8db 100644 --- a/helm-charts/common/tts/Chart.yaml +++ b/helm-charts/common/tts/Chart.yaml @@ -5,12 +5,12 @@ apiVersion: v2 name: tts description: The Helm chart for deploying tts as microservice type: application -version: 0.9.0 +version: 1.0.0 # The tts microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: speecht5 - version: 0.9.0 + version: 1.0.0 repository: file://../speecht5 condition: autodependency.enabled diff --git a/helm-charts/common/tts/values.yaml b/helm-charts/common/tts/values.yaml index 56f5792c..6de99219 100644 --- a/helm-charts/common/tts/values.yaml +++ b/helm-charts/common/tts/values.yaml @@ -16,7 +16,7 @@ image: repository: opea/tts pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/web-retriever/Chart.yaml b/helm-charts/common/web-retriever/Chart.yaml index e9b32741..4bc31da0 100644 --- a/helm-charts/common/web-retriever/Chart.yaml +++ b/helm-charts/common/web-retriever/Chart.yaml @@ -5,11 +5,11 @@ apiVersion: v2 name: web-retriever description: The Helm chart for deploying web retriever as microservice type: application -version: 0.9.0 +version: 1.0.0 # The web retriever microservice server version -appVersion: "v0.9" +appVersion: "v1.0" dependencies: - name: tei - version: 0.9.0 + version: 1.0.0 repository: file://../tei condition: autodependency.enabled diff --git a/helm-charts/common/web-retriever/values.yaml b/helm-charts/common/web-retriever/values.yaml index 156eb0db..99d4272a 100644 --- a/helm-charts/common/web-retriever/values.yaml +++ b/helm-charts/common/web-retriever/values.yaml @@ -18,7 +18,7 @@ image: repository: opea/web-retriever-chroma pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/common/whisper/Chart.yaml b/helm-charts/common/whisper/Chart.yaml index a6a82296..5d8e5f36 100644 --- a/helm-charts/common/whisper/Chart.yaml +++ b/helm-charts/common/whisper/Chart.yaml @@ -5,6 +5,6 @@ apiVersion: v2 name: whisper description: The Helm chart for deploying whisper as microservice type: application -version: 0.9.0 +version: 1.0.0 # The whisper microservice server version -appVersion: "v0.9" +appVersion: "v1.0" diff --git a/helm-charts/common/whisper/gaudi-values.yaml b/helm-charts/common/whisper/gaudi-values.yaml index 781e99c8..fec919ad 100644 --- a/helm-charts/common/whisper/gaudi-values.yaml +++ b/helm-charts/common/whisper/gaudi-values.yaml @@ -7,7 +7,7 @@ image: repository: opea/whisper-gaudi - tag: "" + tag: "latest" resources: limits: diff --git a/helm-charts/common/whisper/values.yaml b/helm-charts/common/whisper/values.yaml index 2d72d886..62b6aa4a 100644 --- a/helm-charts/common/whisper/values.yaml +++ b/helm-charts/common/whisper/values.yaml @@ -13,7 +13,7 @@ image: repository: opea/whisper pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/helm-charts/docsum/Chart.yaml b/helm-charts/docsum/Chart.yaml index a99a3136..f61b0902 100644 --- a/helm-charts/docsum/Chart.yaml +++ b/helm-charts/docsum/Chart.yaml @@ -7,10 +7,10 @@ description: The Helm chart to deploy DocSum type: application dependencies: - name: tgi - version: 0.9.0 + version: 1.0.0 repository: "file://../common/tgi" - name: llm-uservice - version: 0.9.0 + version: 1.0.0 repository: "file://../common/llm-uservice" -version: 0.9.0 -appVersion: "v0.9" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 4331d361..3ff0747b 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -12,7 +12,7 @@ image: repository: opea/docsum pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" port: 8888 service: diff --git a/helm-charts/update_manifests.sh b/helm-charts/update_manifests.sh index 2c1ad074..c0f7ae9c 100755 --- a/helm-charts/update_manifests.sh +++ b/helm-charts/update_manifests.sh @@ -16,7 +16,7 @@ function generate_yaml { outputdir=$2 local extraparams="" - if [[ $(grep -c 'tag: ""' ./common/$chart/values.yaml) != 0 ]]; then + if [[ $(grep -c 'tag: "latest"' ./common/$chart/values.yaml) != 0 ]]; then extraparams="--set image.tag=$NEWTAG" fi @@ -25,7 +25,7 @@ function generate_yaml { for f in `ls ./common/$chart/*-values.yaml 2>/dev/null `; do ext=$(basename $f | cut -d'-' -f1) extraparams="" - if [[ $(grep -c 'tag: ""' $f) != 0 ]]; then + if [[ $(grep -c 'tag: "latest"' $f) != 0 ]]; then extraparams="--set image.tag=$NEWTAG" fi helm template $chart ./common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true $extraparams > ${outputdir}/${chart}_${ext}.yaml diff --git a/microservices-connector/README.md b/microservices-connector/README.md index a2ab11aa..e84de975 100644 --- a/microservices-connector/README.md +++ b/microservices-connector/README.md @@ -3,7 +3,8 @@ This repo defines the GenAI Microservice Connector(GMC) for OPEA projects. GMC can be used to compose and adjust GenAI pipelines dynamically on kubernetes. It can leverage the microservices provided by [GenAIComps](https://github.com/opea-project/GenAIComps) and external services to compose GenAI pipelines. External services might be running in a public cloud or on-prem by providing an URL and access details such as an API key and ensuring there is network connectivity. It also allows users to adjust the pipeline on the fly like switching to a different Large language Model(LLM), adding new functions into the chain(like adding guardrails),etc. GMC supports different types of steps in the pipeline, like sequential, parallel and conditional. -Please refer this [usage_guide](./usage_guide.md) for sample use cases. +Please refer to [usage_guide](./usage_guide.md) for sample use cases. +Please refer to [chatqna_use_cases](./config/samples/ChatQnA/use_cases.md) for more ChatQnA use cases. ## Description diff --git a/microservices-connector/cmd/router/main.go b/microservices-connector/cmd/router/main.go index 73e8aecd..eacf84db 100644 --- a/microservices-connector/cmd/router/main.go +++ b/microservices-connector/cmd/router/main.go @@ -38,24 +38,32 @@ import ( flag "github.com/spf13/pflag" ) +const ( + BufferSize = 1024 + MaxGoroutines = 1024 + ServiceURL = "serviceUrl" + ServiceNode = "node" + DataPrep = "DataPrep" + Parameters = "parameters" +) + var ( jsonGraph = flag.String("graph-json", "", "serialized json graph def") log = logf.Log.WithName("GMCGraphRouter") mcGraph *mcv1alpha3.GMConnector defaultNodeName = "root" - Prefix = []byte("data: b'") - Suffix = []byte("'\n\n") - DONE = []byte("[DONE]") - Newline = []byte("\n") -) - -const ( - BufferSize = 1024 - ServiceURL = "serviceUrl" - ServiceNode = "node" - DataPrep = "DataPrep" - Parameters = "parameters" - Llm = "Llm" + semaphore = make(chan struct{}, MaxGoroutines) + transport = &http.Transport{ + MaxIdleConns: 1000, + MaxIdleConnsPerHost: 100, + IdleConnTimeout: 2 * time.Minute, + TLSHandshakeTimeout: time.Minute, + ExpectContinueTimeout: 30 * time.Second, + } + callClient = &http.Client{ + Transport: transport, + Timeout: 30 * time.Second, + } ) type EnsembleStepOutput struct { @@ -151,6 +159,9 @@ func callService( input []byte, headers http.Header, ) (io.ReadCloser, int, error) { + semaphore <- struct{}{} + defer func() { <-semaphore }() + defer timeTrack(time.Now(), "step", serviceUrl) log.Info("Entering callService", "url", serviceUrl) @@ -164,6 +175,7 @@ func callService( return nil, 400, err } } + req, err := http.NewRequest("POST", serviceUrl, bytes.NewBuffer(input)) if err != nil { log.Error(err, "An error occurred while preparing request object with serviceUrl.", "serviceUrl", serviceUrl) @@ -173,8 +185,8 @@ func callService( if val := req.Header.Get("Content-Type"); val == "" { req.Header.Add("Content-Type", "application/json") } - resp, err := http.DefaultClient.Do(req) + resp, err := callClient.Do(req) if err != nil { log.Error(err, "An error has occurred while calling service", "service", serviceUrl) return nil, 500, err @@ -561,15 +573,6 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) { break } - /*sliceBF := buffer[:n] - if !bytes.HasPrefix(sliceBF, DONE) { - sliceBF = bytes.TrimPrefix(sliceBF, Prefix) - sliceBF = bytes.TrimSuffix(sliceBF, Suffix) - } else { - sliceBF = bytes.Join([][]byte{Newline, sliceBF}, nil) - } - log.Info("[llm - chat_stream] chunk:", "Buffer", string(sliceBF))*/ - // Write the chunk to the ResponseWriter if _, err := w.Write(buffer[:n]); err != nil { log.Error(err, "failed to write to ResponseWriter") diff --git a/microservices-connector/config/manifests/asr.yaml b/microservices-connector/config/manifests/asr.yaml index 6985c84c..606c8d95 100644 --- a/microservices-connector/config/manifests/asr.yaml +++ b/microservices-connector/config/manifests/asr.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: asr-config labels: - helm.sh/chart: asr-0.9.0 + helm.sh/chart: asr-1.0.0 app.kubernetes.io/name: asr app.kubernetes.io/instance: asr - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: ASR_ENDPOINT: "http://asr-whisper:7066" @@ -28,10 +28,10 @@ kind: Service metadata: name: asr labels: - helm.sh/chart: asr-0.9.0 + helm.sh/chart: asr-1.0.0 app.kubernetes.io/name: asr app.kubernetes.io/instance: asr - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -53,10 +53,10 @@ kind: Deployment metadata: name: asr labels: - helm.sh/chart: asr-0.9.0 + helm.sh/chart: asr-1.0.0 app.kubernetes.io/name: asr app.kubernetes.io/instance: asr - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/chatqna-ui.yaml b/microservices-connector/config/manifests/chatqna-ui.yaml index 2c6e75de..aff07171 100644 --- a/microservices-connector/config/manifests/chatqna-ui.yaml +++ b/microservices-connector/config/manifests/chatqna-ui.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: chatqna-ui-config labels: - helm.sh/chart: chatqna-ui-0.9.0 + helm.sh/chart: chatqna-ui-1.0.0 app.kubernetes.io/name: chatqna-ui app.kubernetes.io/instance: chatqna-ui - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: http_proxy: "" @@ -33,10 +33,10 @@ kind: Service metadata: name: chatqna-ui labels: - helm.sh/chart: chatqna-ui-0.9.0 + helm.sh/chart: chatqna-ui-1.0.0 app.kubernetes.io/name: chatqna-ui app.kubernetes.io/instance: chatqna-ui - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -58,10 +58,10 @@ kind: Deployment metadata: name: chatqna-ui labels: - helm.sh/chart: chatqna-ui-0.9.0 + helm.sh/chart: chatqna-ui-1.0.0 app.kubernetes.io/name: chatqna-ui app.kubernetes.io/instance: chatqna-ui - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -72,10 +72,10 @@ spec: template: metadata: labels: - helm.sh/chart: chatqna-ui-0.9.0 + helm.sh/chart: chatqna-ui-1.0.0 app.kubernetes.io/name: chatqna-ui app.kubernetes.io/instance: chatqna-ui - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: securityContext: diff --git a/microservices-connector/config/manifests/data-prep.yaml b/microservices-connector/config/manifests/data-prep.yaml index 668348d4..ea3da484 100644 --- a/microservices-connector/config/manifests/data-prep.yaml +++ b/microservices-connector/config/manifests/data-prep.yaml @@ -8,16 +8,18 @@ kind: ConfigMap metadata: name: data-prep-config labels: - helm.sh/chart: data-prep-0.9.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: data-prep - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_ENDPOINT: "http://data-prep-tei" EMBED_MODEL: "" REDIS_URL: "redis://data-prep-redis-vector-db:6379" INDEX_NAME: "rag-redis" + KEY_INDEX_NAME: "file-keys" + SEARCH_BATCH_SIZE: "10" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" http_proxy: "" @@ -36,10 +38,10 @@ kind: Service metadata: name: data-prep labels: - helm.sh/chart: data-prep-0.9.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: data-prep - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -61,10 +63,10 @@ kind: Deployment metadata: name: data-prep labels: - helm.sh/chart: data-prep-0.9.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: data-prep - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/docsum-llm-uservice.yaml b/microservices-connector/config/manifests/docsum-llm-uservice.yaml index 818d91e4..e69d8a06 100644 --- a/microservices-connector/config/manifests/docsum-llm-uservice.yaml +++ b/microservices-connector/config/manifests/docsum-llm-uservice.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: docsum-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://docsum-tgi" @@ -33,10 +33,10 @@ kind: Service metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -58,10 +58,10 @@ kind: Deployment metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/embedding-usvc.yaml b/microservices-connector/config/manifests/embedding-usvc.yaml index d29fdb2e..f0cf5c97 100644 --- a/microservices-connector/config/manifests/embedding-usvc.yaml +++ b/microservices-connector/config/manifests/embedding-usvc.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: embedding-usvc-config labels: - helm.sh/chart: embedding-usvc-0.9.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: embedding-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://embedding-usvc-tei" @@ -31,10 +31,10 @@ kind: Service metadata: name: embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.9.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: embedding-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.9.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: embedding-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/llm-uservice.yaml b/microservices-connector/config/manifests/llm-uservice.yaml index 11941f76..905185b4 100644 --- a/microservices-connector/config/manifests/llm-uservice.yaml +++ b/microservices-connector/config/manifests/llm-uservice.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: llm-uservice - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://llm-uservice-tgi" @@ -33,10 +33,10 @@ kind: Service metadata: name: llm-uservice labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: llm-uservice - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -58,10 +58,10 @@ kind: Deployment metadata: name: llm-uservice labels: - helm.sh/chart: llm-uservice-0.9.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: llm-uservice - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/redis-vector-db.yaml b/microservices-connector/config/manifests/redis-vector-db.yaml index 0fdbf794..177831bb 100644 --- a/microservices-connector/config/manifests/redis-vector-db.yaml +++ b/microservices-connector/config/manifests/redis-vector-db.yaml @@ -8,7 +8,7 @@ kind: Service metadata: name: redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.9.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: redis-vector-db app.kubernetes.io/version: "7.2.0-v9" @@ -37,7 +37,7 @@ kind: Deployment metadata: name: redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.9.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: redis-vector-db app.kubernetes.io/version: "7.2.0-v9" diff --git a/microservices-connector/config/manifests/reranking-usvc.yaml b/microservices-connector/config/manifests/reranking-usvc.yaml index 6302634c..0ffc1bb8 100644 --- a/microservices-connector/config/manifests/reranking-usvc.yaml +++ b/microservices-connector/config/manifests/reranking-usvc.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: reranking-usvc-config labels: - helm.sh/chart: reranking-usvc-0.9.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: reranking-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_RERANKING_ENDPOINT: "http://reranking-usvc-teirerank" @@ -31,10 +31,10 @@ kind: Service metadata: name: reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.9.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: reranking-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.9.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: reranking-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/retriever-usvc.yaml b/microservices-connector/config/manifests/retriever-usvc.yaml index 96d72166..91e18c8e 100644 --- a/microservices-connector/config/manifests/retriever-usvc.yaml +++ b/microservices-connector/config/manifests/retriever-usvc.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: retriever-usvc-config labels: - helm.sh/chart: retriever-usvc-0.9.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: retriever-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://retriever-usvc-tei" @@ -37,10 +37,10 @@ kind: Service metadata: name: retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.9.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: retriever-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -62,10 +62,10 @@ kind: Deployment metadata: name: retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.9.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: retriever-usvc - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/speecht5.yaml b/microservices-connector/config/manifests/speecht5.yaml index 0c19f022..5c5a73b0 100644 --- a/microservices-connector/config/manifests/speecht5.yaml +++ b/microservices-connector/config/manifests/speecht5.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: speecht5-config labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" @@ -31,10 +31,10 @@ kind: Service metadata: name: speecht5 labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: speecht5 labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/speecht5_gaudi.yaml b/microservices-connector/config/manifests/speecht5_gaudi.yaml index d06a9e45..0316a092 100644 --- a/microservices-connector/config/manifests/speecht5_gaudi.yaml +++ b/microservices-connector/config/manifests/speecht5_gaudi.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: speecht5-config labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" @@ -31,10 +31,10 @@ kind: Service metadata: name: speecht5 labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: speecht5 labels: - helm.sh/chart: speecht5-0.9.0 + helm.sh/chart: speecht5-1.0.0 app.kubernetes.io/name: speecht5 app.kubernetes.io/instance: speecht5 - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/tei.yaml b/microservices-connector/config/manifests/tei.yaml index bb341166..2889b4d3 100644 --- a/microservices-connector/config/manifests/tei.yaml +++ b/microservices-connector/config/manifests/tei.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: tei-config labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" @@ -33,7 +33,7 @@ kind: Service metadata: name: tei labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" @@ -58,7 +58,7 @@ kind: Deployment metadata: name: tei labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" diff --git a/microservices-connector/config/manifests/tei_gaudi.yaml b/microservices-connector/config/manifests/tei_gaudi.yaml index 7ce9ba51..0af2958e 100644 --- a/microservices-connector/config/manifests/tei_gaudi.yaml +++ b/microservices-connector/config/manifests/tei_gaudi.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: tei-config labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" @@ -33,7 +33,7 @@ kind: Service metadata: name: tei labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" @@ -58,7 +58,7 @@ kind: Deployment metadata: name: tei labels: - helm.sh/chart: tei-0.9.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: tei app.kubernetes.io/version: "cpu-1.5" diff --git a/microservices-connector/config/manifests/teirerank.yaml b/microservices-connector/config/manifests/teirerank.yaml index 26cf991b..e412ecdb 100644 --- a/microservices-connector/config/manifests/teirerank.yaml +++ b/microservices-connector/config/manifests/teirerank.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: teirerank-config labels: - helm.sh/chart: teirerank-0.9.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: teirerank app.kubernetes.io/version: "cpu-1.5" @@ -32,7 +32,7 @@ kind: Service metadata: name: teirerank labels: - helm.sh/chart: teirerank-0.9.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: teirerank app.kubernetes.io/version: "cpu-1.5" @@ -57,7 +57,7 @@ kind: Deployment metadata: name: teirerank labels: - helm.sh/chart: teirerank-0.9.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: teirerank app.kubernetes.io/version: "cpu-1.5" diff --git a/microservices-connector/config/manifests/tgi.yaml b/microservices-connector/config/manifests/tgi.yaml index 976d90fd..774bb0e5 100644 --- a/microservices-connector/config/manifests/tgi.yaml +++ b/microservices-connector/config/manifests/tgi.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: tgi-config labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -35,7 +35,7 @@ kind: Service metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -60,7 +60,7 @@ kind: Deployment metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml index 99910200..37f85e03 100644 --- a/microservices-connector/config/manifests/tgi_gaudi.yaml +++ b/microservices-connector/config/manifests/tgi_gaudi.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: tgi-config labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -36,7 +36,7 @@ kind: Service metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -61,7 +61,7 @@ kind: Deployment metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" diff --git a/microservices-connector/config/manifests/tgi_nv.yaml b/microservices-connector/config/manifests/tgi_nv.yaml index ef17bca9..7736fbee 100644 --- a/microservices-connector/config/manifests/tgi_nv.yaml +++ b/microservices-connector/config/manifests/tgi_nv.yaml @@ -8,7 +8,7 @@ kind: ConfigMap metadata: name: tgi-config labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -34,7 +34,7 @@ kind: Service metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" @@ -59,7 +59,7 @@ kind: Deployment metadata: name: tgi labels: - helm.sh/chart: tgi-0.9.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: tgi app.kubernetes.io/version: "2.1.0" diff --git a/microservices-connector/config/manifests/tts.yaml b/microservices-connector/config/manifests/tts.yaml index d3aeff87..afba1c38 100644 --- a/microservices-connector/config/manifests/tts.yaml +++ b/microservices-connector/config/manifests/tts.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: tts-config labels: - helm.sh/chart: tts-0.9.0 + helm.sh/chart: tts-1.0.0 app.kubernetes.io/name: tts app.kubernetes.io/instance: tts - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TTS_ENDPOINT: "http://tts-speecht5:7055" @@ -28,10 +28,10 @@ kind: Service metadata: name: tts labels: - helm.sh/chart: tts-0.9.0 + helm.sh/chart: tts-1.0.0 app.kubernetes.io/name: tts app.kubernetes.io/instance: tts - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -53,10 +53,10 @@ kind: Deployment metadata: name: tts labels: - helm.sh/chart: tts-0.9.0 + helm.sh/chart: tts-1.0.0 app.kubernetes.io/name: tts app.kubernetes.io/instance: tts - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/web-retriever.yaml b/microservices-connector/config/manifests/web-retriever.yaml index 4f3da27b..14000831 100644 --- a/microservices-connector/config/manifests/web-retriever.yaml +++ b/microservices-connector/config/manifests/web-retriever.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: web-retriever-config labels: - helm.sh/chart: web-retriever-0.9.0 + helm.sh/chart: web-retriever-1.0.0 app.kubernetes.io/name: web-retriever app.kubernetes.io/instance: web-retriever - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://web-retriever-tei" @@ -32,10 +32,10 @@ kind: Service metadata: name: web-retriever labels: - helm.sh/chart: web-retriever-0.9.0 + helm.sh/chart: web-retriever-1.0.0 app.kubernetes.io/name: web-retriever app.kubernetes.io/instance: web-retriever - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -57,10 +57,10 @@ kind: Deployment metadata: name: web-retriever labels: - helm.sh/chart: web-retriever-0.9.0 + helm.sh/chart: web-retriever-1.0.0 app.kubernetes.io/name: web-retriever app.kubernetes.io/instance: web-retriever - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/whisper.yaml b/microservices-connector/config/manifests/whisper.yaml index 1e914ea3..31a9b21b 100644 --- a/microservices-connector/config/manifests/whisper.yaml +++ b/microservices-connector/config/manifests/whisper.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: whisper-config labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" @@ -31,10 +31,10 @@ kind: Service metadata: name: whisper labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: whisper labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/manifests/whisper_gaudi.yaml b/microservices-connector/config/manifests/whisper_gaudi.yaml index ec2d8488..54aa98a2 100644 --- a/microservices-connector/config/manifests/whisper_gaudi.yaml +++ b/microservices-connector/config/manifests/whisper_gaudi.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: whisper-config labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" @@ -31,10 +31,10 @@ kind: Service metadata: name: whisper labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -56,10 +56,10 @@ kind: Deployment metadata: name: whisper labels: - helm.sh/chart: whisper-0.9.0 + helm.sh/chart: whisper-1.0.0 app.kubernetes.io/name: whisper app.kubernetes.io/instance: whisper - app.kubernetes.io/version: "v0.9" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/microservices-connector/config/samples/ChatQnA/chatQnA_switch_xeon.yaml b/microservices-connector/config/samples/ChatQnA/chatQnA_switch_xeon.yaml index 05659991..8225454d 100644 --- a/microservices-connector/config/samples/ChatQnA/chatQnA_switch_xeon.yaml +++ b/microservices-connector/config/samples/ChatQnA/chatQnA_switch_xeon.yaml @@ -120,5 +120,5 @@ spec: serviceName: tgi-service-llama config: endpoint: /generate - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: openlm-research/open_llama_3b isDownstreamService: true diff --git a/microservices-connector/config/samples/ChatQnA/use_cases.md b/microservices-connector/config/samples/ChatQnA/use_cases.md new file mode 100644 index 00000000..e7f916b1 --- /dev/null +++ b/microservices-connector/config/samples/ChatQnA/use_cases.md @@ -0,0 +1,222 @@ +# ChatQnA Use Cases in Kubernetes Cluster via GMC + +This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines. + +The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere. + +Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install. + +The ChatQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the ChatQnA RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `embedding`, `retriever`, `rerank`, and `llm`. + +## Using prebuilt images + +The ChatQnA uses the below prebuilt images if you choose a Xeon deployment + +- embedding: opea/embedding-tei:latest +- retriever: opea/retriever-redis:latest +- reranking: opea/reranking-tei:latest +- llm: opea/llm-tgi:latest +- dataprep-redis: opea/dataprep-redis:latest +- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 +- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 +- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu +- redis-vector-db: redis/redis-stack:7.2.0-v9 + +Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services. +For Gaudi: + +- tei-embedding-service: opea/tei-gaudi:latest +- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1 + +> [NOTE] +> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use. + +## Deploy ChatQnA pipeline + +There are 3 use cases for ChatQnA example: + +- General ChatQnA with preset RAG data +- ChatQnA with data preparation which supports that the user can upload RAG data online via dataprep microservice +- ChatQnA supports multiple LLM models which can be switched in runtime + +### General ChatQnA with preset RAG data + +This involves deploying the ChatQnA custom resource. You can use `chatQnA_xeon.yaml` or if you have a Gaudi cluster, you could use `chatQnA_gaudi.yaml`. + +1. Create namespace and deploy application + + ```sh + kubectl create ns chatqa + kubectl apply -f $(pwd)/chatQnA_xeon.yaml + ``` + +2. GMC will reconcile the ChatQnA custom resource and get all related components/services ready. Check if the service up. + + ```sh + kubectl get service -n chatqa + ``` + +3. Retrieve the application access URL + + ```sh + kubectl get gmconnectors.gmc.opea.io -n chatqa + NAME URL READY AGE + chatqa http://router-service.chatqa.svc.cluster.local:8080 9/0/9 3m + ``` + +4. Deploy a client pod to test the application + + ```sh + kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep infinity + ``` + +5. Access the application using the above URL from the client pod + + ```sh + export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name}) + export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' + ``` + +6. Perhaps you want to try another LLM model? Just modify the application custom resource to use another LLM model + + Should you, for instance, want to change the LLM model you are using in the ChatQnA pipeline, just edit the custom resource file. + For example, to use Llama-2-7b-chat-hf make the following edit: + + ```yaml + - name: Tgi + internalService: + serviceName: tgi-service-m + config: + LLM_MODEL_ID: Llama-2-7b-chat-hf + ``` + +7. Apply the change + + ``` + kubectl apply -f $(pwd)/chatQnA_xeon.yaml + ``` + +8. Check that the tgi-svc-deployment has been changed to use the new LLM Model + + ```sh + kubectl get deployment tgi-service-m-deployment -n chatqa -o jsonpath="{.spec.template.spec.containers[*].env[?(@.name=='LLM_MODEL_ID')].value}" + ``` + +9. Access the updated pipeline using the same URL from above using the client pod + + ```sh + kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What are the key features of Intel Gaudi?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' + ``` + +> [NOTE] + +You can remove your ChatQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the chatqa namespace. + +### ChatQnA with data preparation + +This involves deploying the ChatQnA custom resource. You can use `chatQnA_dataprep_xeon.yaml` or if you have a Gaudi cluster, you could use `chatQnA_dataprep_gaudi.yaml`. + +1. Create namespace and deploy application + + ```sh + kubectl create ns chatqa + kubectl apply -f $(pwd)/chatQnA_dataprep_xeon.yaml + ``` + +2. GMC will reconcile the ChatQnA custom resource and get all related components/services ready. Check if the service up. + + ```sh + kubectl get service -n chatqa + ``` + +3. Retrieve the application access URL + + ```sh + kubectl get gmconnectors.gmc.opea.io -n chatqa + NAME URL READY AGE + chatqa http://router-service.chatqa.svc.cluster.local:8080 10/0/10 3m + ``` + +> [NOTE] + +Comparing with `General ChatQnA with preset RAG data`, there should be `10` microservices, the extra one is the microservice of `dataprep`. + +4. Deploy a client pod to test the application + + ```sh + kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep infinity + ``` + +5. Upload the RAG data from internet via microservice `dataprep` + + ```sh + export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name}) + export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer "$accessUrl/dataprep" -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" + ``` + +6. Access the application using the above URL from the client pod + + ```sh + kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST '{"text":"What are the key features of Intel Gaudi?","parameters":{"max_new_tokens":100, "do_sample": true}}' -H 'Content-Type: application/json' + ``` + +> [NOTE] + +You can remove your ChatQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the chatqa namespace. + +### ChatQnA supports multiple LLM models + +This involves deploying the ChatQnA custom resource. You can use `chatQnA_switch_xeon.yaml` or if you have a Gaudi cluster, you could use `chatQnA_switch_gaudi.yaml`. Moreover, this use case contains 2 LLM models: `Intel/neural-chat-7b-v3-3` and `meta-llama/CodeLlama-7b-hf`. + +1. Create namespace and deploy application + + ```sh + kubectl create ns switch + kubectl apply -f $(pwd)/chatQnA_switch_xeon.yaml + ``` + +2. GMC will reconcile the ChatQnA custom resource and get all related components/services ready. Check if the service up. + + ```sh + kubectl get service -n switch + ``` + +3. Retrieve the application access URL + + ```sh + kubectl get gmconnectors.gmc.opea.io -n switch + NAME URL READY AGE + switch http://router-service.switch.svc.cluster.local:8080 15/0/15 83s + ``` + +> [NOTE] + +Comparing with `General ChatQnA with preset RAG data`, there should be `15` microservices, the extra are the microservices for different embedding models and LLM models. + +4. Deploy a client pod to test the application + + ```sh + kubectl create deployment client-test -n switch --image=python:3.8.13 -- sleep infinity + ``` + +5. Access the application using the above URL from the client pod by using LLM model `Intel/neural-chat-7b-v3-3` + + ```sh + export CLIENT_POD=$(kubectl get pod -n switch -l app=client-test -o jsonpath={.items..metadata.name}) + export accessUrl=$(kubectl get gmc -n switch -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n switch -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What are the key features of Intel Gaudi?", "model-id":"intel", "embedding-model-id":"small", "parameters":{"max_new_tokens":50, "do_sample": true}}' -H 'Content-Type: application/json' + ``` + +6. Access the application using the above URL from the client pod by using LLM model `meta-llama/CodeLlama-7b-hf` + + ```sh + export CLIENT_POD=$(kubectl get pod -n switch -l app=client-test -o jsonpath={.items..metadata.name}) + export accessUrl=$(kubectl get gmc -n switch -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n switch -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What are the key features of Intel Gaudi?", "model-id":"llama", "embedding-model-id":"small", "parameters":{"max_new_tokens":50, "do_sample": true}}' -H 'Content-Type: application/json' + ``` + +> [NOTE] + +Showing as above, user can switch the LLM models in runtime by changing the request body, such as adding `"model-id":"llama"` in request body to use the Llama model or changing it into `"model-id":"intel"` to use the Intel model. diff --git a/microservices-connector/helm/Chart.yaml b/microservices-connector/helm/Chart.yaml index 9fe8a063..7572a5e6 100644 --- a/microservices-connector/helm/Chart.yaml +++ b/microservices-connector/helm/Chart.yaml @@ -18,10 +18,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.9.0 +version: 1.0.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "v0.9" + +appVersion: "v1.0" + diff --git a/microservices-connector/helm/values.yaml b/microservices-connector/helm/values.yaml index 54a63b36..8fbdd918 100644 --- a/microservices-connector/helm/values.yaml +++ b/microservices-connector/helm/values.yaml @@ -11,7 +11,7 @@ image: repository: opea/gmcmanager pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "latest" imagePullSecrets: [] nameOverride: "" diff --git a/scripts/nvidia/README.md b/scripts/nvidia/README.md index 3b03090a..82d498fc 100644 --- a/scripts/nvidia/README.md +++ b/scripts/nvidia/README.md @@ -1,25 +1,25 @@ -# QuickSatrt Guide +# NVIDIA GPU Quick-Start Guide Ver: 1.0 Last Update: 2024-Aug-21 Author: [PeterYang12](https://github.com/PeterYang12) E-mail: yuhan.yang@intel.com -This document is a quickstart guide for GenAIInfra deployment and test on NVIDIA GPU platform. +This document is a quick-start guide for GenAIInfra deployment and test on NVIDIA GPU platform. ## Prerequisite -GenAIInfra uses Kubernetes as the cloud native infrastructure. Please follow the steps below to prepare the Kubernetes environment. +GenAIInfra uses Kubernetes as the cloud native infrastructure. Follow these steps to prepare the Kubernetes environment. -#### Setup Kubernetes cluster +### Setup Kubernetes cluster -Please follow [Kubernetes official setup guide](https://github.com/opea-project/GenAIInfra?tab=readme-ov-file#setup-kubernetes-cluster) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27. +Follow the [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend you use Kubernetes with version >= 1.27. -#### To run GenAIInfra on NVIDIA GPUs +### To run GenAIInfra on NVIDIA GPUs -To run the workloads on NVIDIA GPUs, please follow the steps. +To run the workloads on NVIDIA GPUs, follow these steps. -1. Please check the [support matrix](https://docs.nvidia.com/ai-enterprise/latest/product-support-matrix/index.html) to make sure that environment meets the requirements. +1. Check the [support matrix](https://docs.nvidia.com/ai-enterprise/latest/product-support-matrix/index.html) to make sure your environment meets the requirements. 2. [Install the NVIDIA GPU CUDA driver and software stack](https://developer.nvidia.com/cuda-downloads). @@ -28,15 +28,15 @@ To run the workloads on NVIDIA GPUs, please follow the steps. 4. [Install the NVIDIA GPU device plugin for Kubernetes](https://github.com/NVIDIA/k8s-device-plugin). 5. [Install helm](https://helm.sh/docs/intro/install/) -NOTE: Please make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup. +NOTE: Make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup. ## Usages -#### Use GenAI Microservices Connector (GMC) to deploy and adjust GenAIExamples on NVIDIA GPUs +### Use GenAI Microservices Connector (GMC) to deploy and adjust GenAIExamples on NVIDIA GPUs #### 1. Install the GMC Helm Chart -**_NOTE_**: Before installingGMC, please export your own huggingface tokens, Google API KEY and Google CSE ID. If you have pre-defined directory to save the models on you cluster hosts, please also set the path. +**_NOTE_**: Before installing GMC, export your own huggingface tokens, Google API KEY, and Google CSE ID. If you have a pre-defined directory to save the models on you cluster hosts, also set the path. ``` export YOUR_HF_TOKEN= @@ -45,21 +45,21 @@ export YOUR_GOOGLE_CSE_ID= export MOUNT_DIR= ``` -Here also provides a simple way to install GMC using helm chart `./install-gmc.sh` +Here is a simple way to install GMC using helm chart `./install-gmc.sh` > WARNING: the install-gmc.sh may fail due to OS distributions. -For more details, please refer to [GMC installation](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/README.md) to get more details. +For more details, refer to [GMC installation](../../microservices-connector/README.md) to get more details. #### 2.Use GMC to compose a ChatQnA Pipeline -Please refer to [Usage guide for GMC](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/usage_guide.md) for more details. +Refer to [Usage guide for GMC](../../microservices-connector/usage_guide.md) for more details. Here provides a simple script `./gmc-chatqna-pipeline.sh` to use GMC to compose ChatQnA pipeline. #### 3. Test ChatQnA service -Please refer to [GMC ChatQnA test](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/usage_guide.md#use-gmc-to-compose-a-chatqna-pipeline) +Refer to [GMC ChatQnA test](../../microservices-connector/usage_guide.md#use-gmc-to-compose-a-chatqna-pipeline) Here provides a simple way to test the service. `./gmc-chatqna-test.sh` #### 4. Delete ChatQnA and GMC @@ -71,4 +71,4 @@ kubectl delete ns chatqa ## FAQ and Troubleshooting -The scripts are only tested on baremental **Ubuntu22.04** with **NVIDIA H100**. Please report an issue if you meet any issue. +The scripts are only tested on bare metal **Ubuntu 22.04** with **NVIDIA H100**. Report an issue if you meet any issue.