From 4e72db2d487f55674fb7568a34895bfd13ee1930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Burda?= Date: Tue, 7 Feb 2023 08:11:51 +0100 Subject: [PATCH] fix multi-cluster support for windows --- dashboards/windows.libsonnet | 57 +++++++++++++++++--------------- rules/windows.libsonnet | 64 ++++++++++++++++++------------------ 2 files changed, 62 insertions(+), 59 deletions(-) diff --git a/dashboards/windows.libsonnet b/dashboards/windows.libsonnet index c4097f6ca..973b954e3 100644 --- a/dashboards/windows.libsonnet +++ b/dashboards/windows.libsonnet @@ -164,21 +164,22 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addTemplate( template.new( - 'namespace', + 'cluster', '$datasource', - 'label_values(windows_pod_container_available, namespace)', - label='Namespace', + 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, + label='cluster', refresh='time', + hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) - ).addTemplate( + ) + .addTemplate( template.new( - 'cluster', + 'namespace', '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', + 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config, + label='Namespace', refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) ) @@ -267,31 +268,32 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addTemplate( template.new( - 'namespace', + 'cluster', '$datasource', - 'label_values(windows_pod_container_available, namespace)', - label='Namespace', + 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, + label='cluster', refresh='time', + hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) ) .addTemplate( template.new( - 'pod', + 'namespace', '$datasource', - 'label_values(windows_pod_container_available{namespace="$namespace"}, pod)', - label='Pod', + 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config, + label='Namespace', refresh='time', sort=1, ) - ).addTemplate( + ) + .addTemplate( template.new( - 'cluster', + 'pod', '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', + 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster",namespace="$namespace"}, pod)' % $._config, + label='Pod', refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) ) @@ -493,21 +495,22 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addTemplate( template.new( - 'instance', + 'cluster', '$datasource', - 'label_values(windows_system_system_up_time, instance)', - label='Instance', + 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, + label='cluster', refresh='time', + hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) - ).addTemplate( + ) + .addTemplate( template.new( - 'cluster', + 'instance', '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', + 'label_values(windows_system_system_up_time{%(clusterLabel)s="$cluster"}, instance)' % $._config, + label='Instance', refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', sort=1, ) ) diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index 92ff017e0..014032ef7 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -8,7 +8,7 @@ // This rule gives the number of windows nodes record: 'node:windows_node:sum', expr: ||| - count ( + count by (%(clusterLabel)s) ( windows_system_system_up_time{%(windowsExporterSelector)s} ) ||| % $._config, @@ -17,7 +17,7 @@ // This rule gives the number of CPUs per node. record: 'node:windows_node_num_cpu:sum', expr: ||| - count by (instance) (sum by (instance, core) ( + count by (%(clusterLabel)s, instance) (sum by (%(clusterLabel)s, instance, core) ( windows_cpu_time_total{%(windowsExporterSelector)s} )) ||| % $._config, @@ -26,14 +26,14 @@ // CPU utilisation is % CPU is not idle. record: ':windows_node_cpu_utilisation:avg1m', expr: ||| - 1 - avg(rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) + 1 - avg by (%(clusterLabel)s) (rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) ||| % $._config, }, { // CPU utilisation is % CPU is not idle. record: 'node:windows_node_cpu_utilisation:avg1m', expr: ||| - 1 - avg by (instance) ( + 1 - avg by (%(clusterLabel)s, instance) ( rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m]) ) ||| % $._config, @@ -42,9 +42,9 @@ record: ':windows_node_memory_utilisation:', expr: ||| 1 - - sum(windows_memory_available_bytes{%(windowsExporterSelector)s}) + sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s}) / - sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) + sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, // Add separate rules for Free & Total, so we can aggregate across clusters @@ -52,7 +52,7 @@ { record: ':windows_node_memory_MemFreeCached_bytes:sum', expr: ||| - sum(windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) + sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { @@ -64,7 +64,7 @@ { record: ':windows_node_memory_MemTotal_bytes:sum', expr: ||| - sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) + sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { @@ -72,7 +72,7 @@ // SINCE 2018-02-08 record: 'node:windows_node_memory_bytes_available:sum', expr: ||| - sum by (instance) ( + sum by (%(clusterLabel)s, instance) ( (windows_memory_available_bytes{%(windowsExporterSelector)s}) ) ||| % $._config, @@ -81,7 +81,7 @@ // Total memory per node record: 'node:windows_node_memory_bytes_total:sum', expr: ||| - sum by (instance) ( + sum by (%(clusterLabel)s, instance) ( windows_os_visible_memory_bytes{%(windowsExporterSelector)s} ) ||| % $._config, @@ -111,7 +111,7 @@ // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: ':windows_node_disk_utilisation:avg_irate', expr: ||| - avg(irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + + avg by (%(clusterLabel)s) (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m]) ) ||| % $._config, @@ -120,7 +120,7 @@ // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: 'node:windows_node_disk_utilisation:avg_irate', expr: ||| - avg by (instance) ( + avg by (%(clusterLabel)s, instance) ( (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m])) ) @@ -129,7 +129,7 @@ { record: 'node:windows_node_filesystem_usage:', expr: ||| - max by (instance,volume)( + max by (%(clusterLabel)s,instance,volume)( (windows_logical_disk_size_bytes{%(windowsExporterSelector)s} - windows_logical_disk_free_bytes{%(windowsExporterSelector)s}) / windows_logical_disk_size_bytes{%(windowsExporterSelector)s} @@ -139,19 +139,19 @@ { record: 'node:windows_node_filesystem_avail:', expr: ||| - max by (instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) + max by (%(clusterLabel)s, instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { record: ':windows_node_net_utilisation:sum_irate', expr: ||| - sum(irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) + sum by (%(clusterLabel)s) (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_utilisation:sum_irate', expr: ||| - sum by (instance) ( + sum by (%(clusterLabel)s, instance) ( (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) ) ||| % $._config, @@ -159,14 +159,14 @@ { record: ':windows_node_net_saturation:sum_irate', expr: ||| - sum(irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + - sum(irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) + sum by (%(clusterLabel)s) (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + + sum by (%(clusterLabel)s) (irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_saturation:sum_irate', expr: ||| - sum by (instance) ( + sum by (%(clusterLabel)s, instance) ( (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m]) + irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) ) @@ -180,71 +180,71 @@ { record: 'windows_pod_container_available', expr: ||| - windows_container_available{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_available{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'windows_container_total_runtime', expr: ||| - windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'windows_container_memory_usage', expr: ||| - windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'windows_container_private_working_set_usage', expr: ||| - windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'windows_container_network_received_bytes_total', expr: ||| - windows_container_network_receive_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_network_receive_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'windows_container_network_transmitted_bytes_total', expr: ||| - windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) + windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_request', expr: ||| - max by (namespace, pod, container) ( + max by (%(clusterLabel)s, namespace, pod, container) ( kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(container,pod,namespace) (windows_pod_container_available) + ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available) + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| - max by (namespace, pod, container) ( + max by (%(clusterLabel)s, namespace, pod, container) ( kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(container,pod,namespace) (windows_pod_container_available) + ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available) + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate', expr: ||| - sum by (namespace, pod, container) ( + sum by (%(clusterLabel)s, namespace, pod, container) ( rate(windows_container_total_runtime{}[5m]) ) ||| % $._config,