diff --git a/chart/openfaas/templates/prometheus-pro-cfg.yaml b/chart/openfaas/templates/prometheus-pro-cfg.yaml index 361a908aa..dd8dca275 100644 --- a/chart/openfaas/templates/prometheus-pro-cfg.yaml +++ b/chart/openfaas/templates/prometheus-pro-cfg.yaml @@ -72,9 +72,7 @@ data: namespaces: names: - {{ .Release.Namespace }} -{{- if ne $functionNs (.Release.Namespace | toString) }} - - {{ $functionNs }} -{{- end }} + relabel_configs: - action: labelmap regex: __meta_kubernetes_pod_label_(.+) @@ -143,23 +141,69 @@ data: regex: '(.*);(.*)' replacement: '${1}.${2}' target_label: "function_name" + + - job_name: 'openfaas-function-pods' + scrape_interval: 15s + kubernetes_sd_configs: + - role: pod + # TODO: Insert namespace for role + {{- if not .Values.clusterRole }} + namespaces: + names: [ {{ $functionNs | quote }} ] + {{- end }} + relabel_configs: + # Only keep OpenFaaS function pods + - source_labels: [__meta_kubernetes_pod_labelpresent_faas_function] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: replace + regex: (.+) + source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + target_label: __metrics_path__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespacev + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + - action: replace + source_labels: + - kubernetes_pod_name + regex: '^([0-9a-zA-Z-]+)+(-[0-9a-zA-Z]+-[0-9a-zA-Z]+)$' + replacement: '$1' + target_label: deployment_name + + metric_relabel_configs: + - source_labels: [deployment_name, kubernetes_namespace] + separator: ";" + regex: '(.*);(.*)' + replacement: '${1}.${2}' + target_label: "function_name" prometheus-rules.yml: | groups: - name: load rules: - record: job:function_current_load:sum - expr: sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1 + expr: ceil(sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1) labels: scaling_type: rps - record: job:function_current_load:sum - expr: sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1 + expr: ceil(sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1) labels: scaling_type: capacity - record: job:function_current_load:sum - expr: sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 ) + expr: ceil(sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 )) labels: scaling_type: cpu