diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md index 771edcab..c669f712 100644 --- a/helm-charts/codegen/README.md +++ b/helm-charts/codegen/README.md @@ -14,7 +14,7 @@ cd GenAIInfra/helm-charts/ helm dependency update codegen export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" -export MODELNAME="meta-llama/CodeLlama-7b-hf" +export MODELNAME="Qwen/Qwen2.5-Coder-7B-Instruct" # To run on Xeon helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} # To run on Gaudi @@ -23,9 +23,7 @@ helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -- ### IMPORTANT NOTE -1. To use model `meta-llama/CodeLlama-7b-hf`, you should first goto the [huggingface model card](https://huggingface.co/meta-llama/CodeLlama-7b-hf) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model. - -2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. +1. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. ## Verify @@ -58,8 +56,8 @@ Open a browser to access `http://:${port}` to play with the ## Values -| Key | Type | Default | Description | -| ---------------- | ------ | ------------------------------ | ------------------------------------------------------------------------ | -| image.repository | string | `"opea/codegen"` | | -| service.port | string | `"7778"` | | -| tgi.LLM_MODEL_ID | string | `"meta-llama/CodeLlama-7b-hf"` | Models id from https://huggingface.co/, or predownloaded model directory | +| Key | Type | Default | Description | +| ---------------- | ------ | ---------------------------------- | ------------------------------------------------------------------------ | +| image.repository | string | `"opea/codegen"` | | +| service.port | string | `"7778"` | | +| tgi.LLM_MODEL_ID | string | `"Qwen/Qwen2.5-Coder-7B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory | diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml index 17f7355d..32212042 100644 --- a/helm-charts/codegen/values.yaml +++ b/helm-charts/codegen/values.yaml @@ -41,7 +41,7 @@ affinity: {} # To override values in subchart tgi tgi: - LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf + LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct codegen-ui: image: diff --git a/helm-charts/codetrans/README.md b/helm-charts/codetrans/README.md index d1e64de1..eb014a7c 100644 --- a/helm-charts/codetrans/README.md +++ b/helm-charts/codetrans/README.md @@ -14,12 +14,18 @@ cd GenAIInfra/helm-charts/ helm dependency update codetrans export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" -export MODELNAME="HuggingFaceH4/mistral-7b-grok" +export MODELNAME="mistralai/Mistral-7B-Instruct-v0.3" helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} # To use Gaudi device # helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values codetrans/gaudi-values.yaml ``` +### IMPORTANT NOTE + +1. To use model `mistralai/Mistral-7B-Instruct-v0.3`, you should first goto the [huggingface model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model. + +2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. + ## Verify To verify the installation, run the command `kubectl get pod` to make sure all pods are running. @@ -51,8 +57,8 @@ Open a browser to access `http://:${port}` to play with the ## Values -| Key | Type | Default | Description | -| ---------------- | ------ | --------------------------------- | ------------------------------------------------------------------------ | -| image.repository | string | `"opea/codetrans"` | | -| service.port | string | `"7777"` | | -| tgi.LLM_MODEL_ID | string | `"HuggingFaceH4/mistral-7b-grok"` | Models id from https://huggingface.co/, or predownloaded model directory | +| Key | Type | Default | Description | +| ---------------- | ------ | -------------------------------------- | ------------------------------------------------------------------------ | +| image.repository | string | `"opea/codetrans"` | | +| service.port | string | `"7777"` | | +| tgi.LLM_MODEL_ID | string | `"mistralai/Mistral-7B-Instruct-v0.3"` | Models id from https://huggingface.co/, or predownloaded model directory | diff --git a/helm-charts/codetrans/values.yaml b/helm-charts/codetrans/values.yaml index 90f2cd48..d3df8a57 100644 --- a/helm-charts/codetrans/values.yaml +++ b/helm-charts/codetrans/values.yaml @@ -42,7 +42,7 @@ affinity: {} # To override values in subchart tgi tgi: - LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok + LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 codetrans-ui: image: diff --git a/microservices-connector/config/samples/CodeGen/codegen_gaudi.yaml b/microservices-connector/config/samples/CodeGen/codegen_gaudi.yaml index 2e378205..d9a927e5 100644 --- a/microservices-connector/config/samples/CodeGen/codegen_gaudi.yaml +++ b/microservices-connector/config/samples/CodeGen/codegen_gaudi.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-gaudi-svc config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/microservices-connector/config/samples/CodeGen/codegen_xeon.yaml b/microservices-connector/config/samples/CodeGen/codegen_xeon.yaml index dd1675ce..8dd3c2b5 100644 --- a/microservices-connector/config/samples/CodeGen/codegen_xeon.yaml +++ b/microservices-connector/config/samples/CodeGen/codegen_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/microservices-connector/config/samples/CodeTrans/codetrans_gaudi.yaml b/microservices-connector/config/samples/CodeTrans/codetrans_gaudi.yaml index 5bc1bd5e..b61ffef3 100644 --- a/microservices-connector/config/samples/CodeTrans/codetrans_gaudi.yaml +++ b/microservices-connector/config/samples/CodeTrans/codetrans_gaudi.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-gaudi-svc config: - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 endpoint: /generate isDownstreamService: true diff --git a/microservices-connector/config/samples/CodeTrans/codetrans_xeon.yaml b/microservices-connector/config/samples/CodeTrans/codetrans_xeon.yaml index 889a1d21..244e7eb5 100644 --- a/microservices-connector/config/samples/CodeTrans/codetrans_xeon.yaml +++ b/microservices-connector/config/samples/CodeTrans/codetrans_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 endpoint: /generate isDownstreamService: true