forked from influxdata/telegraf
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new "systemd_units" input plugin (influxdata#4532)
- Loading branch information
1 parent
fd2e988
commit 6839e55
Showing
6 changed files
with
466 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
# Systemd Units Plugin | ||
|
||
The systemd_units plugin gathers systemd unit status on Linux. It relies on | ||
`systemctl list-units --all --type=service` to collect data on service status. | ||
|
||
The results are tagged with the unit name and provide enumerated fields for | ||
loaded, active and running fields, indicating the unit health. | ||
|
||
This plugin is related to the [win_services module](../win_services/), which | ||
fulfills the same purpose on windows. | ||
|
||
In addition to services, this plugin can gather other unit types as well, | ||
see `systemctl list-units --all --type help` for possible options. | ||
|
||
### Configuration | ||
``` | ||
[[inputs.systemd_units]] | ||
## Set timeout for systemctl execution | ||
# timeout = "1s" | ||
# | ||
## Filter for a specific unit type, default is "service", other possible | ||
## values are "socket", "target", "device", "mount", "automount", "swap", | ||
## "timer", "path", "slice" and "scope ": | ||
# unittype = "service" | ||
``` | ||
|
||
### Metrics | ||
- systemd_units: | ||
- tags: | ||
- name (string, unit name) | ||
- load (string, load state) | ||
- active (string, active state) | ||
- sub (string, sub state) | ||
- fields: | ||
- load_code (int, see below) | ||
- active_code (int, see below) | ||
- sub_code (int, see below) | ||
|
||
#### Load | ||
|
||
enumeration of [unit_load_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L87) | ||
|
||
| Value | Meaning | Description | | ||
| ----- | ------- | ----------- | | ||
| 0 | loaded | unit is ~ | | ||
| 1 | stub | unit is ~ | | ||
| 2 | not-found | unit is ~ | | ||
| 3 | bad-setting | unit is ~ | | ||
| 4 | error | unit is ~ | | ||
| 5 | merged | unit is ~ | | ||
| 6 | masked | unit is ~ | | ||
|
||
#### Active | ||
|
||
enumeration of [unit_active_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L99) | ||
|
||
| Value | Meaning | Description | | ||
| ----- | ------- | ----------- | | ||
| 0 | active | unit is ~ | | ||
| 1 | reloading | unit is ~ | | ||
| 2 | inactive | unit is ~ | | ||
| 3 | failed | unit is ~ | | ||
| 4 | activating | unit is ~ | | ||
| 5 | deactivating | unit is ~ | | ||
|
||
#### Sub | ||
|
||
enumeration of sub states, see various [unittype_state_tables](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L163); | ||
duplicates were removed, tables are hex aligned to keep some space for future | ||
values | ||
|
||
| Value | Meaning | Description | | ||
| ----- | ------- | ----------- | | ||
| | | service_state_table start at 0x0000 | | ||
| 0x0000 | running | unit is ~ | | ||
| 0x0001 | dead | unit is ~ | | ||
| 0x0002 | start-pre | unit is ~ | | ||
| 0x0003 | start | unit is ~ | | ||
| 0x0004 | exited | unit is ~ | | ||
| 0x0005 | reload | unit is ~ | | ||
| 0x0006 | stop | unit is ~ | | ||
| 0x0007 | stop-watchdog | unit is ~ | | ||
| 0x0008 | stop-sigterm | unit is ~ | | ||
| 0x0009 | stop-sigkill | unit is ~ | | ||
| 0x000a | stop-post | unit is ~ | | ||
| 0x000b | final-sigterm | unit is ~ | | ||
| 0x000c | failed | unit is ~ | | ||
| 0x000d | auto-restart | unit is ~ | | ||
| | | service_state_table start at 0x0010 | | ||
| 0x0010 | waiting | unit is ~ | | ||
| | | service_state_table start at 0x0020 | | ||
| 0x0020 | tentative | unit is ~ | | ||
| 0x0021 | plugged | unit is ~ | | ||
| | | service_state_table start at 0x0030 | | ||
| 0x0030 | mounting | unit is ~ | | ||
| 0x0031 | mounting-done | unit is ~ | | ||
| 0x0032 | mounted | unit is ~ | | ||
| 0x0033 | remounting | unit is ~ | | ||
| 0x0034 | unmounting | unit is ~ | | ||
| 0x0035 | remounting-sigterm | unit is ~ | | ||
| 0x0036 | remounting-sigkill | unit is ~ | | ||
| 0x0037 | unmounting-sigterm | unit is ~ | | ||
| 0x0038 | unmounting-sigkill | unit is ~ | | ||
| | | service_state_table start at 0x0040 | | ||
| | | service_state_table start at 0x0050 | | ||
| 0x0050 | abandoned | unit is ~ | | ||
| | | service_state_table start at 0x0060 | | ||
| 0x0060 | active | unit is ~ | | ||
| | | service_state_table start at 0x0070 | | ||
| 0x0070 | start-chown | unit is ~ | | ||
| 0x0071 | start-post | unit is ~ | | ||
| 0x0072 | listening | unit is ~ | | ||
| 0x0073 | stop-pre | unit is ~ | | ||
| 0x0074 | stop-pre-sigterm | unit is ~ | | ||
| 0x0075 | stop-pre-sigkill | unit is ~ | | ||
| 0x0076 | final-sigkill | unit is ~ | | ||
| | | service_state_table start at 0x0080 | | ||
| 0x0080 | activating | unit is ~ | | ||
| 0x0081 | activating-done | unit is ~ | | ||
| 0x0082 | deactivating | unit is ~ | | ||
| 0x0083 | deactivating-sigterm | unit is ~ | | ||
| 0x0084 | deactivating-sigkill | unit is ~ | | ||
| | | service_state_table start at 0x0090 | | ||
| | | service_state_table start at 0x00a0 | | ||
| 0x00a0 | elapsed | unit is ~ | | ||
| | | | | ||
|
||
### Example Output | ||
|
||
Linux Systemd Units: | ||
``` | ||
$ telegraf --test --config /tmp/telegraf.conf | ||
> systemd_units,host=host1.example.com,name=dbus.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000 | ||
> systemd_units,host=host1.example.com,name=networking.service,load=loaded,active=failed,sub=failed load_code=0i,active_code=3i,sub_code=12i 1533730725000000000 | ||
> systemd_units,host=host1.example.com,name=ssh.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000 | ||
... | ||
``` | ||
|
||
### Possible Improvements | ||
- add blacklist to filter names |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
package systemd_units | ||
|
||
import ( | ||
"bufio" | ||
"bytes" | ||
"fmt" | ||
"os/exec" | ||
"strings" | ||
"time" | ||
|
||
"github.com/influxdata/telegraf" | ||
"github.com/influxdata/telegraf/internal" | ||
"github.com/influxdata/telegraf/plugins/inputs" | ||
) | ||
|
||
// SystemdUnits is a telegraf plugin to gather systemd unit status | ||
type SystemdUnits struct { | ||
Timeout internal.Duration | ||
UnitType string `toml:"unittype"` | ||
systemctl systemctl | ||
} | ||
|
||
type systemctl func(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) | ||
|
||
const measurement = "systemd_units" | ||
|
||
// Below are mappings of systemd state tables as defined in | ||
// https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c | ||
// Duplicate strings are removed from this list. | ||
var load_map = map[string]int{ | ||
"loaded": 0, | ||
"stub": 1, | ||
"not-found": 2, | ||
"bad-setting": 3, | ||
"error": 4, | ||
"merged": 5, | ||
"masked": 6, | ||
} | ||
|
||
var active_map = map[string]int{ | ||
"active": 0, | ||
"reloading": 1, | ||
"inactive": 2, | ||
"failed": 3, | ||
"activating": 4, | ||
"deactivating": 5, | ||
} | ||
|
||
var sub_map = map[string]int{ | ||
// service_state_table, offset 0x0000 | ||
"running": 0x0000, | ||
"dead": 0x0001, | ||
"start-pre": 0x0002, | ||
"start": 0x0003, | ||
"exited": 0x0004, | ||
"reload": 0x0005, | ||
"stop": 0x0006, | ||
"stop-watchdog": 0x0007, | ||
"stop-sigterm": 0x0008, | ||
"stop-sigkill": 0x0009, | ||
"stop-post": 0x000a, | ||
"final-sigterm": 0x000b, | ||
"failed": 0x000c, | ||
"auto-restart": 0x000d, | ||
|
||
// automount_state_table, offset 0x0010 | ||
"waiting": 0x0010, | ||
|
||
// device_state_table, offset 0x0020 | ||
"tentative": 0x0020, | ||
"plugged": 0x0021, | ||
|
||
// mount_state_table, offset 0x0030 | ||
"mounting": 0x0030, | ||
"mounting-done": 0x0031, | ||
"mounted": 0x0032, | ||
"remounting": 0x0033, | ||
"unmounting": 0x0034, | ||
"remounting-sigterm": 0x0035, | ||
"remounting-sigkill": 0x0036, | ||
"unmounting-sigterm": 0x0037, | ||
"unmounting-sigkill": 0x0038, | ||
|
||
// path_state_table, offset 0x0040 | ||
|
||
// scope_state_table, offset 0x0050 | ||
"abandoned": 0x0050, | ||
|
||
// slice_state_table, offset 0x0060 | ||
"active": 0x0060, | ||
|
||
// socket_state_table, offset 0x0070 | ||
"start-chown": 0x0070, | ||
"start-post": 0x0071, | ||
"listening": 0x0072, | ||
"stop-pre": 0x0073, | ||
"stop-pre-sigterm": 0x0074, | ||
"stop-pre-sigkill": 0x0075, | ||
"final-sigkill": 0x0076, | ||
|
||
// swap_state_table, offset 0x0080 | ||
"activating": 0x0080, | ||
"activating-done": 0x0081, | ||
"deactivating": 0x0082, | ||
"deactivating-sigterm": 0x0083, | ||
"deactivating-sigkill": 0x0084, | ||
|
||
// target_state_table, offset 0x0090 | ||
|
||
// timer_state_table, offset 0x00a0 | ||
"elapsed": 0x00a0, | ||
} | ||
|
||
var ( | ||
defaultTimeout = internal.Duration{Duration: time.Second} | ||
defaultUnitType = "service" | ||
) | ||
|
||
// Description returns a short description of the plugin | ||
func (s *SystemdUnits) Description() string { | ||
return "Gather systemd units state" | ||
} | ||
|
||
// SampleConfig returns sample configuration options. | ||
func (s *SystemdUnits) SampleConfig() string { | ||
return ` | ||
## Set timeout for systemctl execution | ||
# timeout = "1s" | ||
# | ||
## Filter for a specific unit type, default is "service", other possible | ||
## values are "socket", "target", "device", "mount", "automount", "swap", | ||
## "timer", "path", "slice" and "scope ": | ||
# unittype = "service" | ||
` | ||
} | ||
|
||
// Gather parses systemctl outputs and adds counters to the Accumulator | ||
func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error { | ||
out, err := s.systemctl(s.Timeout, s.UnitType) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
scanner := bufio.NewScanner(out) | ||
for scanner.Scan() { | ||
line := scanner.Text() | ||
|
||
data := strings.Fields(line) | ||
if len(data) < 4 { | ||
acc.AddError(fmt.Errorf("Error parsing line (expected at least 4 fields): %s", line)) | ||
continue | ||
} | ||
name := data[0] | ||
load := data[1] | ||
active := data[2] | ||
sub := data[3] | ||
tags := map[string]string{ | ||
"name": name, | ||
"load": load, | ||
"active": active, | ||
"sub": sub, | ||
} | ||
|
||
var ( | ||
load_code int | ||
active_code int | ||
sub_code int | ||
ok bool | ||
) | ||
if load_code, ok = load_map[load]; !ok { | ||
acc.AddError(fmt.Errorf("Error parsing field 'load', value not in map: %s", load)) | ||
continue | ||
} | ||
if active_code, ok = active_map[active]; !ok { | ||
acc.AddError(fmt.Errorf("Error parsing field 'active', value not in map: %s", active)) | ||
continue | ||
} | ||
if sub_code, ok = sub_map[sub]; !ok { | ||
acc.AddError(fmt.Errorf("Error parsing field 'sub', value not in map: %s", sub)) | ||
continue | ||
} | ||
fields := map[string]interface{}{ | ||
"load_code": load_code, | ||
"active_code": active_code, | ||
"sub_code": sub_code, | ||
} | ||
|
||
acc.AddFields(measurement, fields, tags) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func setSystemctl(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) { | ||
// is systemctl available ? | ||
systemctlPath, err := exec.LookPath("systemctl") | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
cmd := exec.Command(systemctlPath, "list-units", "--all", fmt.Sprintf("--type=%s", UnitType), "--no-legend") | ||
|
||
var out bytes.Buffer | ||
cmd.Stdout = &out | ||
err = internal.RunTimeout(cmd, Timeout.Duration) | ||
if err != nil { | ||
return &out, fmt.Errorf("error running systemctl list-units --all --type=%s --no-legend: %s", UnitType, err) | ||
} | ||
|
||
return &out, nil | ||
} | ||
|
||
func init() { | ||
inputs.Add("systemd_units", func() telegraf.Input { | ||
return &SystemdUnits{ | ||
systemctl: setSystemctl, | ||
Timeout: defaultTimeout, | ||
UnitType: defaultUnitType, | ||
} | ||
}) | ||
} |
Oops, something went wrong.