Skip to content

Commit

Permalink
add systemd_triggered_by label to container_info metric to detect…
Browse files Browse the repository at this point in the history
… periodic jobs triggered by systemd timers
  • Loading branch information
def committed Jun 20, 2024
1 parent 4787e13 commit d0e515d
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 12 deletions.
23 changes: 12 additions & 11 deletions containers/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ type ContainerNetwork struct {
}

type ContainerMetadata struct {
name string
labels map[string]string
volumes map[string]string
logPath string
image string
logDecoder logparser.Decoder
hostListens map[string][]netaddr.IPPort
networks map[string]ContainerNetwork
env map[string]string
name string
labels map[string]string
volumes map[string]string
logPath string
image string
logDecoder logparser.Decoder
hostListens map[string][]netaddr.IPPort
networks map[string]ContainerNetwork
env map[string]string
systemdTriggeredBy string
}

type Delays struct {
Expand Down Expand Up @@ -225,8 +226,8 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
c.lock.RLock()
defer c.lock.RUnlock()

if c.metadata.image != "" {
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image)
if c.metadata.image != "" || c.metadata.systemdTriggeredBy != "" {
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemdTriggeredBy)
}

ch <- counter(metrics.Restarts, float64(c.restarts))
Expand Down
2 changes: 1 addition & 1 deletion containers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var metrics = struct {
JvmSafepointSyncTime *prometheus.Desc
Ip2Fqdn *prometheus.Desc
}{
ContainerInfo: metric("container_info", "Meta information about the container", "image"),
ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by"),

Restarts: metric("container_restarts_total", "Number of times the container was restarted"),

Expand Down
4 changes: 4 additions & 0 deletions containers/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,10 @@ func calcId(cg *cgroup.Cgroup, md *ContainerMetadata) ContainerID {

func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) {
switch cg.ContainerType {
case cgroup.ContainerTypeSystemdService:
md := &ContainerMetadata{}
md.systemdTriggeredBy = SystemdTriggeredBy(cg.ContainerId)
return md, nil
case cgroup.ContainerTypeDocker, cgroup.ContainerTypeContainerd, cgroup.ContainerTypeSandbox, cgroup.ContainerTypeCrio:
default:
return &ContainerMetadata{}, nil
Expand Down
56 changes: 56 additions & 0 deletions containers/systemd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package containers

import (
"context"
"os"
"strconv"
"strings"
"time"

"github.com/coroot/coroot-node-agent/proc"

"github.com/coreos/go-systemd/v22/dbus"
gdbus "github.com/godbus/dbus/v5"

"k8s.io/klog/v2"
)

var (
conn *dbus.Conn
dbusTimeout = time.Second
)

func init() {
var err error
conn, err = dbus.NewConnection(func() (*gdbus.Conn, error) {
c, err := gdbus.Dial("unix:path=" + proc.HostPath("/run/systemd/private"))
if err != nil {
return nil, err
}
methods := []gdbus.Auth{gdbus.AuthExternal(strconv.Itoa(os.Getuid()))}
if err = c.Auth(methods); err != nil {
conn.Close()
return nil, err
}
return c, nil
})
if err != nil {
klog.Warningln("failed to connect to systemd bus:", err)
}
}

func SystemdTriggeredBy(id string) string {
if conn == nil {
return ""
}
ctx, cancel := context.WithTimeout(context.Background(), dbusTimeout)
defer cancel()
parts := strings.Split(id, "/")
unit := parts[len(parts)-1]
if prop, _ := conn.GetUnitPropertyContext(ctx, unit, "TriggeredBy"); prop != nil {
if values, _ := prop.Value.Value().([]string); len(values) > 0 {
return values[0]
}
}
return ""
}

0 comments on commit d0e515d

Please sign in to comment.