diff --git a/systemd/systemd.go b/systemd/systemd.go index 6b5d813..b02c2b8 100644 --- a/systemd/systemd.go +++ b/systemd/systemd.go @@ -69,6 +69,7 @@ type Collector struct { unitActiveExitTimeDesc *prometheus.Desc unitInactiveEnterTimeDesc *prometheus.Desc unitInactiveExitTimeDesc *prometheus.Desc + unitMemoryCurrentDesc *prometheus.Desc nRestartsDesc *prometheus.Desc timerLastTriggerDesc *prometheus.Desc socketAcceptedConnectionsDesc *prometheus.Desc @@ -142,6 +143,11 @@ func NewCollector(logger log.Logger) (*Collector, error) { "Last time the unit transitioned out of the inactive state", []string{"name", "type"}, nil, ) + unitMemoryCurrentDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "unit_memory_current"), + "Current memory per Systemd unit", + []string{"name"}, nil, + ) nRestartsDesc := prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "service_restart_total"), "Service unit count of Restart triggers", []string{"name"}, nil) @@ -205,6 +211,7 @@ func NewCollector(logger log.Logger) (*Collector, error) { unitActiveExitTimeDesc: unitActiveExitTimeDesc, unitInactiveEnterTimeDesc: unitInactiveEnterTimeDesc, unitInactiveExitTimeDesc: unitInactiveExitTimeDesc, + unitMemoryCurrentDesc: unitMemoryCurrentDesc, nRestartsDesc: nRestartsDesc, timerLastTriggerDesc: timerLastTriggerDesc, socketAcceptedConnectionsDesc: socketAcceptedConnectionsDesc, @@ -235,6 +242,7 @@ func (c *Collector) Describe(desc chan<- *prometheus.Desc) { desc <- c.unitStartTimeDesc desc <- c.unitTasksCurrentDesc desc <- c.unitTasksMaxDesc + desc <- c.unitMemoryCurrentDesc desc <- c.nRestartsDesc desc <- c.timerLastTriggerDesc desc <- c.socketAcceptedConnectionsDesc @@ -325,6 +333,11 @@ func (c *Collector) collectUnit(conn *dbus.Conn, ch chan<- prometheus.Metric, un level.Warn(logger).Log("msg", errUnitMetricsMsg, "err", err) } + err = c.collectServiceMemoryMetrics(conn, ch, unit) + if err != nil { + level.Warn(logger).Log("msg", errUnitMetricsMsg, "err", err) + } + err = c.collectUnitCPUUsageMetrics("Service", conn, ch, unit) if err != nil { level.Warn(logger).Log("msg", errUnitMetricsMsg, "err", err) @@ -686,6 +699,27 @@ func (c *Collector) collectServiceTasksMetrics(conn *dbus.Conn, ch chan<- promet return nil } +func (c *Collector) collectServiceMemoryMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error { + memoryCurrentCount, err := conn.GetUnitTypePropertyContext(c.ctx, unit.Name, "Service", "MemoryCurrent") + if err != nil { + return errors.Wrapf(err, errGetPropertyMsg, "MemoryCurrent") + } + + currentCount, ok := memoryCurrentCount.Value.Value().(uint64) + if !ok { + return errors.Errorf(errConvertUint64PropertyMsg, "MemoryCurrent", memoryCurrentCount.Value.Value()) + } + + // Don't set if memoryCurrent if dbus reports MaxUint64. + if currentCount != math.MaxUint64 { + ch <- prometheus.MustNewConstMetric( + c.unitMemoryCurrentDesc, prometheus.GaugeValue, + float64(currentCount), unit.Name) + } + + return nil +} + func (c *Collector) collectTimerTriggerTime(conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error { lastTriggerValue, err := conn.GetUnitTypePropertyContext(c.ctx, unit.Name, "Timer", "LastTriggerUSec") if err != nil {