Skip to content

Commit

Permalink
Merge pull request #13 from platform9/private/vjoshi/main/PMK-5929
Browse files Browse the repository at this point in the history
PMK-5929 Adding additional scrape config
  • Loading branch information
jayanth-tjvrr authored Oct 1, 2024
2 parents 8b2c207 + 36305c1 commit a5043b0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,18 @@ spec:
summary: PersistentVolume is filling up.
expr: |-
(
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*"}
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*"}
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
) < 0.15
and
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*"} > 0
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"}[6h], 4 * 24 * 3600)
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
for: {{ dig "KubePersistentVolumeFillingUp" "for" "1h" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
Expand Down Expand Up @@ -116,12 +120,12 @@ spec:
summary: PersistentVolumeInodes are filling up.
expr: |-
(
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
/
kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
) < 0.03
and
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ $targetNamespace }}"} > 0
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
Expand Down Expand Up @@ -155,14 +159,14 @@ spec:
summary: PersistentVolumeInodes are filling up.
expr: |-
(
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
/
kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
) < 0.15
and
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ $targetNamespace }}"} > 0
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
and
predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ spec:
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high.
expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le)) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le)) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
for: {{ dig "KubeletPodStartUpLatencyHigh" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
Expand Down
13 changes: 7 additions & 6 deletions charts/kube-prometheus-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3944,13 +3944,14 @@ prometheus:
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
- source_labels:
- __metrics_path__
target_label: metrics_path
replacement: /metrics
- job_name: cadvisor
scrape_interval: 2m
honor_labels: false
metrics_path: /metrics/cadvisor
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Expand All @@ -3960,9 +3961,9 @@ prometheus:
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
- source_labels:
- __metrics_path__
target_label: metrics_path
replacement: /metrics/cadvisor
- job_name: apiserver
scrape_interval: 5m
Expand Down

0 comments on commit a5043b0

Please sign in to comment.