Skip to content

Commit

Permalink
fix multi-cluster support for windows
Browse files Browse the repository at this point in the history
  • Loading branch information
TBurda committed Feb 7, 2023
1 parent 07ee070 commit 4e72db2
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 59 deletions.
57 changes: 30 additions & 27 deletions dashboards/windows.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -164,21 +164,22 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addTemplate(
template.new(
'namespace',
'cluster',
'$datasource',
'label_values(windows_pod_container_available, namespace)',
label='Namespace',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
).addTemplate(
)
.addTemplate(
template.new(
'cluster',
'namespace',
'$datasource',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
label='Namespace',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
Expand Down Expand Up @@ -267,31 +268,32 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addTemplate(
template.new(
'namespace',
'cluster',
'$datasource',
'label_values(windows_pod_container_available, namespace)',
label='Namespace',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'pod',
'namespace',
'$datasource',
'label_values(windows_pod_container_available{namespace="$namespace"}, pod)',
label='Pod',
'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
label='Namespace',
refresh='time',
sort=1,
)
).addTemplate(
)
.addTemplate(
template.new(
'cluster',
'pod',
'$datasource',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster",namespace="$namespace"}, pod)' % $._config,
label='Pod',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
Expand Down Expand Up @@ -493,21 +495,22 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addTemplate(
template.new(
'instance',
'cluster',
'$datasource',
'label_values(windows_system_system_up_time, instance)',
label='Instance',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
).addTemplate(
)
.addTemplate(
template.new(
'cluster',
'instance',
'$datasource',
'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
'label_values(windows_system_system_up_time{%(clusterLabel)s="$cluster"}, instance)' % $._config,
label='Instance',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
Expand Down
64 changes: 32 additions & 32 deletions rules/windows.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// This rule gives the number of windows nodes
record: 'node:windows_node:sum',
expr: |||
count (
count by (%(clusterLabel)s) (
windows_system_system_up_time{%(windowsExporterSelector)s}
)
||| % $._config,
Expand All @@ -17,7 +17,7 @@
// This rule gives the number of CPUs per node.
record: 'node:windows_node_num_cpu:sum',
expr: |||
count by (instance) (sum by (instance, core) (
count by (%(clusterLabel)s, instance) (sum by (%(clusterLabel)s, instance, core) (
windows_cpu_time_total{%(windowsExporterSelector)s}
))
||| % $._config,
Expand All @@ -26,14 +26,14 @@
// CPU utilisation is % CPU is not idle.
record: ':windows_node_cpu_utilisation:avg1m',
expr: |||
1 - avg(rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m]))
1 - avg by (%(clusterLabel)s) (rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m]))
||| % $._config,
},
{
// CPU utilisation is % CPU is not idle.
record: 'node:windows_node_cpu_utilisation:avg1m',
expr: |||
1 - avg by (instance) (
1 - avg by (%(clusterLabel)s, instance) (
rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])
)
||| % $._config,
Expand All @@ -42,17 +42,17 @@
record: ':windows_node_memory_utilisation:',
expr: |||
1 -
sum(windows_memory_available_bytes{%(windowsExporterSelector)s})
sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s})
/
sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s})
sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s})
||| % $._config,
},
// Add separate rules for Free & Total, so we can aggregate across clusters
// in dashboards.
{
record: ':windows_node_memory_MemFreeCached_bytes:sum',
expr: |||
sum(windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s})
sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s})
||| % $._config,
},
{
Expand All @@ -64,15 +64,15 @@
{
record: ':windows_node_memory_MemTotal_bytes:sum',
expr: |||
sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s})
sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s})
||| % $._config,
},
{
// Available memory per node
// SINCE 2018-02-08
record: 'node:windows_node_memory_bytes_available:sum',
expr: |||
sum by (instance) (
sum by (%(clusterLabel)s, instance) (
(windows_memory_available_bytes{%(windowsExporterSelector)s})
)
||| % $._config,
Expand All @@ -81,7 +81,7 @@
// Total memory per node
record: 'node:windows_node_memory_bytes_total:sum',
expr: |||
sum by (instance) (
sum by (%(clusterLabel)s, instance) (
windows_os_visible_memory_bytes{%(windowsExporterSelector)s}
)
||| % $._config,
Expand Down Expand Up @@ -111,7 +111,7 @@
// Disk utilisation (ms spent, by rate() it's bound by 1 second)
record: ':windows_node_disk_utilisation:avg_irate',
expr: |||
avg(irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) +
avg by (%(clusterLabel)s) (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) +
irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m])
)
||| % $._config,
Expand All @@ -120,7 +120,7 @@
// Disk utilisation (ms spent, by rate() it's bound by 1 second)
record: 'node:windows_node_disk_utilisation:avg_irate',
expr: |||
avg by (instance) (
avg by (%(clusterLabel)s, instance) (
(irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) +
irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m]))
)
Expand All @@ -129,7 +129,7 @@
{
record: 'node:windows_node_filesystem_usage:',
expr: |||
max by (instance,volume)(
max by (%(clusterLabel)s,instance,volume)(
(windows_logical_disk_size_bytes{%(windowsExporterSelector)s}
- windows_logical_disk_free_bytes{%(windowsExporterSelector)s})
/ windows_logical_disk_size_bytes{%(windowsExporterSelector)s}
Expand All @@ -139,34 +139,34 @@
{
record: 'node:windows_node_filesystem_avail:',
expr: |||
max by (instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s})
max by (%(clusterLabel)s, instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s})
||| % $._config,
},
{
record: ':windows_node_net_utilisation:sum_irate',
expr: |||
sum(irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m]))
sum by (%(clusterLabel)s) (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m]))
||| % $._config,
},
{
record: 'node:windows_node_net_utilisation:sum_irate',
expr: |||
sum by (instance) (
sum by (%(clusterLabel)s, instance) (
(irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m]))
)
||| % $._config,
},
{
record: ':windows_node_net_saturation:sum_irate',
expr: |||
sum(irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) +
sum(irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m]))
sum by (%(clusterLabel)s) (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) +
sum by (%(clusterLabel)s) (irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m]))
||| % $._config,
},
{
record: 'node:windows_node_net_saturation:sum_irate',
expr: |||
sum by (instance) (
sum by (%(clusterLabel)s, instance) (
(irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m]) +
irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m]))
)
Expand All @@ -180,71 +180,71 @@
{
record: 'windows_pod_container_available',
expr: |||
windows_container_available{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_available{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'windows_container_total_runtime',
expr: |||
windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'windows_container_memory_usage',
expr: |||
windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'windows_container_private_working_set_usage',
expr: |||
windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'windows_container_network_received_bytes_total',
expr: |||
windows_container_network_receive_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_network_receive_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'windows_container_network_transmitted_bytes_total',
expr: |||
windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace, %(clusterLabel)s) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace, %(clusterLabel)s)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_memory_request',
expr: |||
max by (namespace, pod, container) (
max by (%(clusterLabel)s, namespace, pod, container) (
kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}
) * on(container,pod,namespace) (windows_pod_container_available)
) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_memory_limit',
expr: |||
kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_cpu_cores_request',
expr: |||
max by (namespace, pod, container) (
max by (%(clusterLabel)s, namespace, pod, container) (
kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}
) * on(container,pod,namespace) (windows_pod_container_available)
) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_cpu_cores_limit',
expr: |||
kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available)
||| % $._config,
},
{
record: 'namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate',
expr: |||
sum by (namespace, pod, container) (
sum by (%(clusterLabel)s, namespace, pod, container) (
rate(windows_container_total_runtime{}[5m])
)
||| % $._config,
Expand Down

0 comments on commit 4e72db2

Please sign in to comment.