From 8700384a90fdbafea435ba6318db1d81242d4190 Mon Sep 17 00:00:00 2001 From: "Han Verstraete (OpenFaaS Ltd)" Date: Wed, 14 Aug 2024 11:33:20 +0200 Subject: [PATCH 1/2] Scrape metrics from functions directly Support scraping metrics from function pods directly. This allows users to provide metrics for custom scaling types. Signed-off-by: Han Verstraete (OpenFaaS Ltd) --- .../templates/prometheus-pro-cfg.yaml | 50 +++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/chart/openfaas/templates/prometheus-pro-cfg.yaml b/chart/openfaas/templates/prometheus-pro-cfg.yaml index 361a908aa..ed72c7ede 100644 --- a/chart/openfaas/templates/prometheus-pro-cfg.yaml +++ b/chart/openfaas/templates/prometheus-pro-cfg.yaml @@ -72,9 +72,7 @@ data: namespaces: names: - {{ .Release.Namespace }} -{{- if ne $functionNs (.Release.Namespace | toString) }} - - {{ $functionNs }} -{{- end }} + relabel_configs: - action: labelmap regex: __meta_kubernetes_pod_label_(.+) @@ -143,6 +141,52 @@ data: regex: '(.*);(.*)' replacement: '${1}.${2}' target_label: "function_name" + + - job_name: 'openfaas-function-pods' + scrape_interval: 15s + kubernetes_sd_configs: + - role: pod + # TODO: Insert namespace for role + {{- if not .Values.clusterRole }} + namespaces: + names: [ {{ $functionNs | quote }} ] + {{- end }} + relabel_configs: + # Only keep OpenFaaS function pods + - source_labels: [__meta_kubernetes_pod_labelpresent_faas_function] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: replace + regex: (.+) + source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + target_label: __metrics_path__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + - action: replace + source_labels: + - kubernetes_pod_name + regex: '^([0-9a-zA-Z-]+)+(-[0-9a-zA-Z]+-[0-9a-zA-Z]+)$' + replacement: '$1' + target_label: deployment_name + + metric_relabel_configs: + - source_labels: [deployment_name, kubernetes_namespace] + separator: ";" + regex: '(.*);(.*)' + replacement: '${1}.${2}' + target_label: "function_name" prometheus-rules.yml: | groups: From 2dfa9e71bd410608896a88d71bf83b6bedc996f2 Mon Sep 17 00:00:00 2001 From: "Han Verstraete (OpenFaaS Ltd)" Date: Wed, 21 Aug 2024 10:49:56 +0200 Subject: [PATCH 2/2] Ceil load values for scaling in recording rule The load used to be sealed by the autoscaler when it queried the load metric. To support custom load recording rules that do not need to be ceiled ceiling is now done on the individual recording rules that need it. Signed-off-by: Han Verstraete (OpenFaaS Ltd) --- chart/openfaas/templates/prometheus-pro-cfg.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chart/openfaas/templates/prometheus-pro-cfg.yaml b/chart/openfaas/templates/prometheus-pro-cfg.yaml index ed72c7ede..dd8dca275 100644 --- a/chart/openfaas/templates/prometheus-pro-cfg.yaml +++ b/chart/openfaas/templates/prometheus-pro-cfg.yaml @@ -170,7 +170,7 @@ data: target_label: __metrics_path__ - source_labels: [__meta_kubernetes_namespace] action: replace - target_label: kubernetes_namespace + target_label: kubernetes_namespacev - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name @@ -193,17 +193,17 @@ data: - name: load rules: - record: job:function_current_load:sum - expr: sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1 + expr: ceil(sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1) labels: scaling_type: rps - record: job:function_current_load:sum - expr: sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1 + expr: ceil(sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1) labels: scaling_type: capacity - record: job:function_current_load:sum - expr: sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 ) + expr: ceil(sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 )) labels: scaling_type: cpu