From eece396b8b8cf069bba7871afa9890408616e0ed Mon Sep 17 00:00:00 2001 From: SuperQ Date: Sun, 15 Dec 2024 17:39:51 +0100 Subject: [PATCH] [kube-prometheus-stack] Feat: Improve cAdvisor metrics scrape Improve the collection of cAdvisor metrics by adjusting the scrape interval to match the kubelet hardcoded minimum housekeeping interval. * Set the cAdvisor, and resource, interval to 10s by default. * Use the user configured a kubelet metrics interval if not default. * Enforce honorTimestamps for cAdvisor and resource if timetamp staleness is enabled. Signed-off-by: SuperQ --- charts/kube-prometheus-stack/Chart.yaml | 2 +- .../exporters/kubelet/servicemonitor.yaml | 12 ++++++++++++ charts/kube-prometheus-stack/values.yaml | 17 ++++++++++++----- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/charts/kube-prometheus-stack/Chart.yaml b/charts/kube-prometheus-stack/Chart.yaml index 7e0d30dfa1b8..f2bc85f97c6c 100644 --- a/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kube-prometheus-stack/Chart.yaml @@ -23,7 +23,7 @@ name: kube-prometheus-stack sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus -version: 67.1.0 +version: 67.2.0 appVersion: v0.79.0 kubeVersion: ">=1.19.0-0" home: https://github.com/prometheus-operator/kube-prometheus diff --git a/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml b/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml index 5b34e93a5cb3..09391c38711b 100644 --- a/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml +++ b/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml @@ -61,6 +61,8 @@ spec: path: /metrics/cadvisor {{- if .Values.kubelet.serviceMonitor.interval }} interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- else }} + interval: {{ .Values.kubelet.serviceMonitor.cAdvisorInterval }} {{- end }} {{- if .Values.kubelet.serviceMonitor.proxyUrl }} proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} @@ -69,7 +71,11 @@ spec: scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} {{- end }} honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + {{- if .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + honorTimestamps: true + {{- else }} honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + {{- end }} trackTimestampsStaleness: {{ .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} {{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }} @@ -112,6 +118,8 @@ spec: path: {{ .Values.kubelet.serviceMonitor.resourcePath }} {{- if .Values.kubelet.serviceMonitor.interval }} interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- else }} + interval: {{ .Values.kubelet.serviceMonitor.resourceInterval }} {{- end }} {{- if .Values.kubelet.serviceMonitor.proxyUrl }} proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} @@ -120,7 +128,11 @@ spec: scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} {{- end }} honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + {{- if .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + honorTimestamps: true + {{- else }} honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + {{- end }} trackTimestampsStaleness: {{ .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} {{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }} diff --git a/charts/kube-prometheus-stack/values.yaml b/charts/kube-prometheus-stack/values.yaml index 554a7bc11d48..6efdb92023fb 100644 --- a/charts/kube-prometheus-stack/values.yaml +++ b/charts/kube-prometheus-stack/values.yaml @@ -1324,7 +1324,7 @@ kubelet: ## If true, defines whether Prometheus tracks staleness of the metrics that have an explicit timestamp present in scraped data. Has no effect if `honorTimestamps` is false. ## We recommend enabling this if you want the best possible accuracy for container_ metrics scraped from cadvisor. - ## + ## For more details see: https://github.com/prometheus-community/helm-charts/pull/5063#issuecomment-2545374849 trackTimestampsStaleness: true ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. @@ -1362,10 +1362,6 @@ kubelet: ## insecureSkipVerify: true - ## Enable scraping /metrics/cadvisor from kubelet's service - ## - cAdvisor: true - ## Enable scraping /metrics/probes from kubelet's service ## probes: true @@ -1376,7 +1372,18 @@ kubelet: resource: false # From kubernetes 1.18, /metrics/resource/v1alpha1 renamed to /metrics/resource resourcePath: "/metrics/resource/v1alpha1" + ## Configure the scrape interval for resource metrics. This is configured to the default Kubelet cAdvisor + ## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored + ## if kubelet.serviceMonitor.interval is not empty. + resourceInterval: 10s + ## Enable scraping /metrics/cadvisor from kubelet's service + ## + cAdvisor: true + ## Configure the scrape interval for cAdvisor. This is configured to the default Kubelet cAdvisor + ## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored + ## if kubelet.serviceMonitor.interval is not empty. + cAdvisorInterval: 10s ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig ##