Add HPA-enabled manifests

Signed-off-by: Alexey Fomenko <[email protected]>
opea-project · Aug 21, 2024 · ef2fe65 · ef2fe65
1 parent fc27c77
commit ef2fe65
Show file tree

Hide file tree

Showing 4 changed files with 661 additions and 0 deletions.
diff --git a/microservices-connector/config/HPA/customMetrics.yaml b/microservices-connector/config/HPA/customMetrics.yaml
@@ -0,0 +1,51 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+data:
+  config.yaml: |
+      rules:
+      - seriesQuery: '{__name__="tgi_request_inference_duration_sum"}'
+        # Average request latency from TGI histograms, over 1 min
+        # (0.001 divider add is to make sure there's always a valid value)
+        metricsQuery: 'rate(tgi_request_inference_duration_sum{<<.LabelMatchers>>}[1m]) / (0.001+rate(tgi_request_inference_duration_count{<<.LabelMatchers>>}[1m]))'
+        name:
+          matches: ^tgi_request_inference_duration_sum
+          as: "tgi_request_latency"
+        resources:
+          # HPA needs both namespace + suitable object resource for its query paths:
+          # /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/tgi_request_latency
+          # (pod is not suitable object type for matching as each instance has different name)
+          overrides:
+            namespace:
+              resource: namespace
+            service:
+              resource: service
+      - seriesQuery: '{__name__="te_request_inference_duration_sum",service="tei-reranking-svc"}'
+        # Average request latency from TEI histograms, over 1 min
+        metricsQuery: 'rate(te_request_inference_duration_sum{service="tei-reranking-svc",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="tei-reranking-svc",<<.LabelMatchers>>}[1m]))'
+        name:
+          matches: ^te_request_inference_duration_sum
+          as: "reranking_request_latency"
+        resources:
+          overrides:
+            namespace:
+              resource: namespace
+            service:
+              resource: service
+      - seriesQuery: '{__name__="te_request_inference_duration_sum",service="tei-embedding-svc"}'
+        # Average request latency from TEI histograms, over 1 min
+        metricsQuery: 'rate(te_request_inference_duration_sum{service="tei-embedding-svc",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="tei-embedding-svc",<<.LabelMatchers>>}[1m]))'
+        name:
+          matches: ^te_request_inference_duration_sum
+          as: "embedding_request_latency"
+        resources:
+          overrides:
+            namespace:
+              resource: namespace
+            service:
+              resource: service
+kind: ConfigMap
+metadata:
+  name: adapter-config
+  namespace: monitoring
diff --git a/microservices-connector/config/HPA/tei.yaml b/microservices-connector/config/HPA/tei.yaml
@@ -0,0 +1,205 @@
+---
+# Source: tei/templates/configmap.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: tei-config
+  labels:
+    helm.sh/chart: tei-0.8.0
+    app.kubernetes.io/name: tei
+    app.kubernetes.io/instance: tei
+    app.kubernetes.io/version: "cpu-1.5"
+    app.kubernetes.io/managed-by: Helm
+data:
+  MODEL_ID: "BAAI/bge-base-en-v1.5"
+  PORT: "2081"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  NUMBA_CACHE_DIR: "/tmp"
+  TRANSFORMERS_CACHE: "/tmp/transformers_cache"
+  HF_HOME: "/tmp/.cache/huggingface"
+  MAX_WARMUP_SEQUENCE_LENGTH: "512"
+---
+# Source: tei/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: tei
+  labels:
+    helm.sh/chart: tei-0.8.0
+    app.kubernetes.io/name: tei
+    app.kubernetes.io/instance: tei
+    app.kubernetes.io/version: "cpu-1.5"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 2081
+      protocol: TCP
+      name: tei
+  selector:
+    app.kubernetes.io/name: tei
+    app.kubernetes.io/instance: tei
+---
+# Source: tei/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tei
+  labels:
+    helm.sh/chart: tei-0.8.0
+    app.kubernetes.io/name: tei
+    app.kubernetes.io/instance: tei
+    app.kubernetes.io/version: "cpu-1.5"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  # use explicit replica counts only of HorizontalPodAutoscaler is disabled
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tei
+      app.kubernetes.io/instance: tei
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tei
+        app.kubernetes.io/instance: tei
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tei
+          envFrom:
+            - configMapRef:
+                name: tei-config
+            - configMapRef:
+                name: extra-env-config
+                optional: true
+          securityContext:
+            {}
+          image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
+          imagePullPolicy: IfNotPresent
+          args:
+            - "--auto-truncate"
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: 2081
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 24
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          startupProbe:
+            failureThreshold: 120
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          resources:
+            {}
+      volumes:
+        - name: model-volume
+          hostPath:
+            path: /mnt/opea-models
+            type: Directory
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+        - name: tmp
+          emptyDir: {}
+      # extra time to finish processing buffered requests before HPA forcibly terminates pod
+      terminationGracePeriodSeconds: 60
+---
+# Source: tei/templates/horizontalPodAutoscaler.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: tei
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: tei
+  minReplicas: 1
+  maxReplicas: 2
+  metrics:
+  - type: Object
+    object:
+      metric:
+        # tei-embedding time metrics are in seconds
+        name: embedding_request_latency
+      describedObject:
+        apiVersion: v1
+        # get metric for named object of given type (in same namespace)
+        kind: Service
+        name: tei
+      target:
+        # embedding_request_latency is average for all TEI pods. To avoid replica fluctuations when
+        # TEI startup + request processing takes longer than HPA evaluation period, this uses
+        # "Value" (replicas = metric.value / target.value), instead of "averageValue" type:
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details
+        type: Value
+        value: 4
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 180
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 15
+    scaleUp:
+      selectPolicy: Max
+      stabilizationWindowSeconds: 0
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 15
+      - type: Pods
+        value: 2
+        periodSeconds: 15
+---
+# Source: tei/templates/servicemonitor.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: tei
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tei
+  endpoints:
+  - interval: 4s
+    port: tei
+    scheme: http