-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Alexey Fomenko <[email protected]>
- Loading branch information
Showing
4 changed files
with
661 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: v1 | ||
data: | ||
config.yaml: | | ||
rules: | ||
- seriesQuery: '{__name__="tgi_request_inference_duration_sum"}' | ||
# Average request latency from TGI histograms, over 1 min | ||
# (0.001 divider add is to make sure there's always a valid value) | ||
metricsQuery: 'rate(tgi_request_inference_duration_sum{<<.LabelMatchers>>}[1m]) / (0.001+rate(tgi_request_inference_duration_count{<<.LabelMatchers>>}[1m]))' | ||
name: | ||
matches: ^tgi_request_inference_duration_sum | ||
as: "tgi_request_latency" | ||
resources: | ||
# HPA needs both namespace + suitable object resource for its query paths: | ||
# /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/tgi_request_latency | ||
# (pod is not suitable object type for matching as each instance has different name) | ||
overrides: | ||
namespace: | ||
resource: namespace | ||
service: | ||
resource: service | ||
- seriesQuery: '{__name__="te_request_inference_duration_sum",service="tei-reranking-svc"}' | ||
# Average request latency from TEI histograms, over 1 min | ||
metricsQuery: 'rate(te_request_inference_duration_sum{service="tei-reranking-svc",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="tei-reranking-svc",<<.LabelMatchers>>}[1m]))' | ||
name: | ||
matches: ^te_request_inference_duration_sum | ||
as: "reranking_request_latency" | ||
resources: | ||
overrides: | ||
namespace: | ||
resource: namespace | ||
service: | ||
resource: service | ||
- seriesQuery: '{__name__="te_request_inference_duration_sum",service="tei-embedding-svc"}' | ||
# Average request latency from TEI histograms, over 1 min | ||
metricsQuery: 'rate(te_request_inference_duration_sum{service="tei-embedding-svc",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="tei-embedding-svc",<<.LabelMatchers>>}[1m]))' | ||
name: | ||
matches: ^te_request_inference_duration_sum | ||
as: "embedding_request_latency" | ||
resources: | ||
overrides: | ||
namespace: | ||
resource: namespace | ||
service: | ||
resource: service | ||
kind: ConfigMap | ||
metadata: | ||
name: adapter-config | ||
namespace: monitoring |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
--- | ||
# Source: tei/templates/configmap.yaml | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: v1 | ||
kind: ConfigMap | ||
metadata: | ||
name: tei-config | ||
labels: | ||
helm.sh/chart: tei-0.8.0 | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
app.kubernetes.io/version: "cpu-1.5" | ||
app.kubernetes.io/managed-by: Helm | ||
data: | ||
MODEL_ID: "BAAI/bge-base-en-v1.5" | ||
PORT: "2081" | ||
http_proxy: "" | ||
https_proxy: "" | ||
no_proxy: "" | ||
NUMBA_CACHE_DIR: "/tmp" | ||
TRANSFORMERS_CACHE: "/tmp/transformers_cache" | ||
HF_HOME: "/tmp/.cache/huggingface" | ||
MAX_WARMUP_SEQUENCE_LENGTH: "512" | ||
--- | ||
# Source: tei/templates/service.yaml | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: tei | ||
labels: | ||
helm.sh/chart: tei-0.8.0 | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
app.kubernetes.io/version: "cpu-1.5" | ||
app.kubernetes.io/managed-by: Helm | ||
spec: | ||
type: ClusterIP | ||
ports: | ||
- port: 80 | ||
targetPort: 2081 | ||
protocol: TCP | ||
name: tei | ||
selector: | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
--- | ||
# Source: tei/templates/deployment.yaml | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: tei | ||
labels: | ||
helm.sh/chart: tei-0.8.0 | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
app.kubernetes.io/version: "cpu-1.5" | ||
app.kubernetes.io/managed-by: Helm | ||
spec: | ||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
template: | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: tei | ||
app.kubernetes.io/instance: tei | ||
spec: | ||
securityContext: | ||
{} | ||
containers: | ||
- name: tei | ||
envFrom: | ||
- configMapRef: | ||
name: tei-config | ||
- configMapRef: | ||
name: extra-env-config | ||
optional: true | ||
securityContext: | ||
{} | ||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" | ||
imagePullPolicy: IfNotPresent | ||
args: | ||
- "--auto-truncate" | ||
volumeMounts: | ||
- mountPath: /data | ||
name: model-volume | ||
- mountPath: /dev/shm | ||
name: shm | ||
- mountPath: /tmp | ||
name: tmp | ||
ports: | ||
- name: http | ||
containerPort: 2081 | ||
protocol: TCP | ||
livenessProbe: | ||
failureThreshold: 24 | ||
httpGet: | ||
path: /health | ||
port: http | ||
initialDelaySeconds: 5 | ||
periodSeconds: 5 | ||
readinessProbe: | ||
httpGet: | ||
path: /health | ||
port: http | ||
initialDelaySeconds: 5 | ||
periodSeconds: 5 | ||
startupProbe: | ||
failureThreshold: 120 | ||
httpGet: | ||
path: /health | ||
port: http | ||
initialDelaySeconds: 5 | ||
periodSeconds: 5 | ||
resources: | ||
{} | ||
volumes: | ||
- name: model-volume | ||
hostPath: | ||
path: /mnt/opea-models | ||
type: Directory | ||
- name: shm | ||
emptyDir: | ||
medium: Memory | ||
sizeLimit: 1Gi | ||
- name: tmp | ||
emptyDir: {} | ||
# extra time to finish processing buffered requests before HPA forcibly terminates pod | ||
terminationGracePeriodSeconds: 60 | ||
--- | ||
# Source: tei/templates/horizontalPodAutoscaler.yaml | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
apiVersion: autoscaling/v2 | ||
kind: HorizontalPodAutoscaler | ||
metadata: | ||
name: tei | ||
spec: | ||
scaleTargetRef: | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
name: tei | ||
minReplicas: 1 | ||
maxReplicas: 2 | ||
metrics: | ||
- type: Object | ||
object: | ||
metric: | ||
# tei-embedding time metrics are in seconds | ||
name: embedding_request_latency | ||
describedObject: | ||
apiVersion: v1 | ||
# get metric for named object of given type (in same namespace) | ||
kind: Service | ||
name: tei | ||
target: | ||
# embedding_request_latency is average for all TEI pods. To avoid replica fluctuations when | ||
# TEI startup + request processing takes longer than HPA evaluation period, this uses | ||
# "Value" (replicas = metric.value / target.value), instead of "averageValue" type: | ||
# https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details | ||
type: Value | ||
value: 4 | ||
behavior: | ||
scaleDown: | ||
stabilizationWindowSeconds: 180 | ||
policies: | ||
- type: Percent | ||
value: 25 | ||
periodSeconds: 15 | ||
scaleUp: | ||
selectPolicy: Max | ||
stabilizationWindowSeconds: 0 | ||
policies: | ||
- type: Percent | ||
value: 50 | ||
periodSeconds: 15 | ||
- type: Pods | ||
value: 2 | ||
periodSeconds: 15 | ||
--- | ||
# Source: tei/templates/servicemonitor.yaml | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: ServiceMonitor | ||
metadata: | ||
name: tei | ||
spec: | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: tei | ||
endpoints: | ||
- interval: 4s | ||
port: tei | ||
scheme: http |
Oops, something went wrong.