Skip to content

Commit

Permalink
Align vllm settings with docker compose version (#554)
Browse files Browse the repository at this point in the history
Align settings with PR opea-project/GenAIExamples#1061.
Make llm-uservice support both tgi and vllm backend.

Signed-off-by: Dolpher Du <[email protected]>
  • Loading branch information
yongfengdu authored Nov 14, 2024
1 parent 5058185 commit 823ce22
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 8 deletions.
4 changes: 4 additions & 0 deletions helm-charts/common/llm-uservice/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ dependencies:
version: 1.0.0
repository: file://../tgi
condition: tgi.enabled
- name: vllm
version: 1.0.0
repository: file://../vllm
condition: vllm.enabled
12 changes: 10 additions & 2 deletions helm-charts/common/llm-uservice/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,23 @@ data:
{{- else }}
TGI_LLM_ENDPOINT: "http://{{ .Release.Name }}-tgi"
{{- end }}
{{- if .Values.vLLM_ENDPOINT }}
vLLM_ENDPOINT: {{ .Values.vLLM_ENDPOINT | quote}}
{{- else }}
vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
{{- end }}
{{- if .Values.LLM_MODEL_ID }}
LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
{{- end }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
HF_HOME: "/tmp/.cache/huggingface"
{{- if .Values.global.HF_ENDPOINT }}
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
{{- end }}
http_proxy: {{ .Values.global.http_proxy | quote }}
https_proxy: {{ .Values.global.https_proxy | quote }}
{{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
{{- if or .Values.global.http_proxy .Values.global.https_proxy }}
no_proxy: "{{ .Release.Name }}-tgi,{{ .Release.Name }}-vllm,{{ .Values.global.no_proxy }}"
{{- else }}
no_proxy: {{ .Values.global.no_proxy | quote }}
{{- end }}
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/common/llm-uservice/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

tgi:
enabled: false
vllm:
enabled: false

replicaCount: 1
# For tgi
TGI_LLM_ENDPOINT: ""
# For vllm, set the LLM_MODEL_ID the same as vllm sub chart
vLLM_ENDPOINT: ""
LLM_MODEL_ID: ""

# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
Expand Down
17 changes: 17 additions & 0 deletions helm-charts/common/llm-uservice/vllm-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Default values for llm-uservice.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
tgi:
enabled: false
vllm:
enabled: true
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

vLLM_ENDPOINT: ""
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
image:
repository: opea/llm-vllm
tag: "latest"
10 changes: 5 additions & 5 deletions helm-charts/common/vllm/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
# Declare variables to be passed into your templates.

image:
repository: opea/llm-vllm-hpu
repository: opea/vllm-hpu
tag: "latest"

VLLM_CPU_KVCACHE_SPACE: "40"

# VLLM_CPU_KVCACHE_SPACE: "40"
OMPI_MCA_btl_vader_single_copy_mechanism: none
extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
# Workaround for current HPU image with start command /bin/bash
# extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
# extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
resources:
limits:
habana.ai/gaudi: 1
3 changes: 3 additions & 0 deletions helm-charts/common/vllm/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ data:
{{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
{{- end }}
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
{{- end }}
2 changes: 1 addition & 1 deletion helm-charts/common/vllm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ resources: {}
# cpu: 100m
# memory: 128Mi

extraCmdArgs: ["--enforce-eager","--dtype","auto"]
extraCmdArgs: ["--enforce-eager", "--dtype", "auto"]

livenessProbe:
httpGet:
Expand Down

0 comments on commit 823ce22

Please sign in to comment.