diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml index 0e8ab7ff2..376b2f045 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 8 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml index f95078778..05c2c9ee8 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml index fcf1bd424..55e74889c 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml index 56fbf194c..0ab712ab8 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml index 0d7c3388c..2795905ee 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 8 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml index 1a8ff4992..67e6ba0ec 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml index 4b5e034ae..ef0b96689 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml index 16e3020f1..0045f12cf 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml @@ -134,7 +134,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy @@ -223,10 +223,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index ddb1b7fc5..1158bada9 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index dae895f52..e40977213 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index ee10361c7..2a54e1ca6 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml index 10c0963a7..ad0d8ec55 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -216,10 +216,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi serviceAccountName: default volumes: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml index 1388453a2..0a2bdd525 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -216,10 +216,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi serviceAccountName: default volumes: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml index b38a50253..9a4554d9f 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -216,10 +216,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi serviceAccountName: default volumes: diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml index c541964e9..4d28d6c06 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml index a4cf76e74..eb23bd381 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml index 9b64b1fbb..d1ccb5710 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data @@ -399,7 +399,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml index 9fbb3fa06..a4bc72759 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml index 36d9c4d2b..302b13136 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml @@ -29,7 +29,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 4 + replicas: 8 selector: matchLabels: app: chatqna-backend-server-deploy @@ -141,7 +141,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 4 + replicas: 8 selector: matchLabels: app: embedding-dependency-deploy @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data @@ -313,7 +313,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 4 + replicas: 8 selector: matchLabels: app: retriever-deploy diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml index 9c74e60dd..179905133 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml index 06c7321a5..8f35cf26b 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml index 7505e2a03..05fcce268 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml @@ -167,10 +167,10 @@ spec: - containerPort: 80 resources: limits: - cpu: 80 + cpu: 76 memory: 20000Mi requests: - cpu: 80 + cpu: 76 memory: 20000Mi volumeMounts: - mountPath: /data