Skip to content

Commit

Permalink
feat(go.d/nvidia_smi): add "index" label to GPU charts (netdata#18833)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyam8 authored Oct 21, 2024
1 parent f0297a5 commit c22ad52
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
5 changes: 3 additions & 2 deletions src/go/plugin/go.d/modules/nvidia_smi/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package nvidia_smi

import (
"fmt"
"strconv"
"strings"

"github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module"
Expand Down Expand Up @@ -261,7 +262,7 @@ var (
}
)

func (nv *NvidiaSmi) addGPUXMLCharts(gpu gpuInfo) {
func (nv *NvidiaSmi) addGpuCharts(gpu gpuInfo, index int) {
charts := gpuXMLCharts.Copy()

if !isValidValue(gpu.Utilization.GpuUtil) {
Expand Down Expand Up @@ -294,7 +295,7 @@ func (nv *NvidiaSmi) addGPUXMLCharts(gpu gpuInfo) {
for _, c := range *charts {
c.ID = fmt.Sprintf(c.ID, strings.ToLower(gpu.UUID))
c.Labels = []module.Label{
// csv output has no 'product_brand'
{Key: "index", Value: strconv.Itoa(index)},
{Key: "uuid", Value: gpu.UUID},
{Key: "product_name", Value: gpu.ProductName},
}
Expand Down
4 changes: 2 additions & 2 deletions src/go/plugin/go.d/modules/nvidia_smi/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (nv *NvidiaSmi) collectGPUInfo(mx map[string]int64) error {
seenGPU := make(map[string]bool)
seenMIG := make(map[string]bool)

for _, gpu := range info.GPUs {
for i, gpu := range info.GPUs {
if !isValidValue(gpu.UUID) {
continue
}
Expand All @@ -49,7 +49,7 @@ func (nv *NvidiaSmi) collectGPUInfo(mx map[string]int64) error {

if !nv.gpus[px] {
nv.gpus[px] = true
nv.addGPUXMLCharts(gpu)
nv.addGpuCharts(gpu, i)
}

addMetric(mx, px+"pcie_bandwidth_usage_rx", gpu.PCI.RxUtil, 1024) // KB => bytes
Expand Down
6 changes: 4 additions & 2 deletions src/go/plugin/go.d/modules/nvidia_smi/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ modules:
description: These metrics refer to the GPU.
labels:
- name: uuid
description: GPU id (e.g. 00000000:00:04.0)
description: GPU uuid (e.g. GPU-27b94a00-ed54-5c24-b1fd-1054085de32a)
- name: index
description: GPU index (nvidia_smi typically orders GPUs by PCI bus ID)
- name: product_name
description: GPU product name (e.g. NVIDIA A100-SXM4-40GB)
metrics:
Expand Down Expand Up @@ -211,7 +213,7 @@ modules:
description: These metrics refer to the Multi-Instance GPU (MIG).
labels:
- name: uuid
description: GPU id (e.g. 00000000:00:04.0)
description: GPU uuid (e.g. GPU-27b94a00-ed54-5c24-b1fd-1054085de32a)
- name: product_name
description: GPU product name (e.g. NVIDIA A100-SXM4-40GB)
- name: gpu_instance_id
Expand Down

0 comments on commit c22ad52

Please sign in to comment.