diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md index 6497828fe..d667727f4 100644 --- a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md +++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md @@ -100,12 +100,12 @@ python deploy.py --uninstall #### Case 2: Baseline Deployment without Rerank ```bash -python deploy.py --hftoken $HFTOKEN --modeldir $MODELDIR --num-nodes 2 +python deploy.py --hf-token $HFTOKEN --model-dir $MODELDIR --num-nodes 2 ``` #### Case 3: Tuned Deployment with Rerank ```bash -python deploy.py --hftoken $HFTOKEN --modeldir $MODELDIR --num-nodes 2 --with-rerank --tuned +python deploy.py --hf-token $HFTOKEN --model-dir $MODELDIR --num-nodes 2 --with-rerank --tuned ``` ## Benchmark diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py index bcaba6b3f..b28881800 100644 --- a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py +++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py @@ -99,7 +99,7 @@ def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selec }, {"name": "teirerank", "resources": {"limits": {"habana.ai/gaudi": 1}}} if with_rerank else None, {"name": "tgi", "resources": {"limits": {"habana.ai/gaudi": 1}}}, - {"name": "retriever", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}}, + {"name": "retriever-usvc", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}}, ] # Filter out any None values directly as part of initialization