Skip to content

Commit

Permalink
rewriting event log metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
mrlhansen committed Jul 15, 2024
1 parent e374d51 commit f05789c
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 52 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ metrics:
system: true
sensors: true
power: true
sel: false
events: false
storage: false
memory: false
network: false
Expand Down Expand Up @@ -125,11 +125,11 @@ idrac_power_control_avg_consumed_watts{id="0",name="System Power Control"} 166
idrac_power_control_interval_in_minutes{id="0",name="System Power Control"} 1
```

### System Event Log
On iDRAC only, the system event log can also be exported. This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created (as reported by iDRAC).
### Event Log
This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created.

```text
idrac_sel_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",component="BaseOSBoot/InstallationStatus",severity="OK"} 1631175352
idrac_log_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",severity="OK"} 1631175352
```

### Storage
Expand Down
41 changes: 33 additions & 8 deletions internal/collector/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type Client struct {
storagePath string
memoryPath string
networkPath string
eventPath string
}

func newHttpClient() *http.Client {
Expand Down Expand Up @@ -137,6 +138,16 @@ func (client *Client) findAllEndpoints() error {
}
}

// Path for event log
switch client.vendor {
case DELL:
client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/Logs/Sel"
case LENOVO:
client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries"
case HPE:
client.eventPath = "/redfish/v1/Systems/1/LogServices/IML/Entries"
}

return nil
}

Expand Down Expand Up @@ -287,23 +298,37 @@ func (client *Client) RefreshPower(mc *Collector, ch chan<- prometheus.Metric) e
return nil
}

func (client *Client) RefreshIdracSel(mc *Collector, ch chan<- prometheus.Metric) error {
if client.vendor != DELL {
func (client *Client) RefreshEventLog(mc *Collector, ch chan<- prometheus.Metric) error {
if client.eventPath == "" {
return nil
}

resp := IdracSelResponse{}
err := client.redfishGet(redfishRootPath+"/Managers/iDRAC.Embedded.1/Logs/Sel", &resp)
resp := EventLogResponse{}
err := client.redfishGet(client.eventPath, &resp)
if err != nil {
return err
}

level := config.Config.Event.SeverityLevel
maxage := config.Config.Event.MaxAgeSeconds

for _, e := range resp.Members {
st := string(e.SensorType)
if st == "" {
st = "Unknown"
t, err := time.Parse(time.RFC3339, e.Created)
if err != nil {
continue
}

d := time.Since(t)
if d.Seconds() > maxage {
continue
}

severity := health2value(e.Severity)
if severity < level {
continue
}
ch <- mc.NewSelEntry(e.Id, e.Message, st, e.Severity, e.Created)

ch <- mc.NewEventLogEntry(e.Id, e.Message, e.Severity, t)
}

return nil
Expand Down
14 changes: 7 additions & 7 deletions internal/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ type Collector struct {
PowerControlInterval *prometheus.Desc

// System event log
SelEntry *prometheus.Desc
EventLogEntry *prometheus.Desc

// Disk drives
DriveInfo *prometheus.Desc
Expand Down Expand Up @@ -210,10 +210,10 @@ func NewCollector() *Collector {
"Interval for measurements of power control system",
[]string{"id", "name"}, nil,
),
SelEntry: prometheus.NewDesc(
prometheus.BuildFQName(prefix, "sel", "entry"),
EventLogEntry: prometheus.NewDesc(
prometheus.BuildFQName(prefix, "log", "entry"),
"Entry from the system event log",
[]string{"id", "message", "component", "severity"}, nil,
[]string{"id", "message", "severity"}, nil,
),
DriveInfo: prometheus.NewDesc(
prometheus.BuildFQName(prefix, "drive", "info"),
Expand Down Expand Up @@ -310,7 +310,7 @@ func (collector *Collector) Describe(ch chan<- *prometheus.Desc) {
ch <- collector.PowerControlMaxConsumedWatts
ch <- collector.PowerControlAvgConsumedWatts
ch <- collector.PowerControlInterval
ch <- collector.SelEntry
ch <- collector.EventLogEntry
ch <- collector.DriveInfo
ch <- collector.DriveHealth
ch <- collector.DriveCapacity
Expand Down Expand Up @@ -372,10 +372,10 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) {
}()
}

if config.Config.Collect.SEL {
if config.Config.Collect.Events {
wg.Add(1)
go func() {
err := collector.client.RefreshIdracSel(collector, ch)
err := collector.client.RefreshEventLog(collector, ch)
if err != nil {
collector.errors.Add(1)
}
Expand Down
25 changes: 12 additions & 13 deletions internal/collector/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

func health2value(health string) float64 {
func health2value(health string) int {
switch health {
case "OK":
return 0
Expand All @@ -20,7 +20,7 @@ func health2value(health string) float64 {
return 10
}

func linkstatus2value(status string) float64 {
func linkstatus2value(status string) int {
switch status {
case "Up", "LinkUp":
return 1
Expand All @@ -45,7 +45,7 @@ func (mc *Collector) NewSystemHealth(health string) prometheus.Metric {
return prometheus.MustNewConstMetric(
mc.SystemHealth,
prometheus.GaugeValue,
value,
float64(value),
health,
)
}
Expand Down Expand Up @@ -117,7 +117,7 @@ func (mc *Collector) NewSensorsFanHealth(id, name, health string) prometheus.Met
return prometheus.MustNewConstMetric(
mc.SensorsFanHealth,
prometheus.GaugeValue,
value,
float64(value),
id,
name,
health,
Expand All @@ -140,7 +140,7 @@ func (mc *Collector) NewPowerSupplyHealth(health, id string) prometheus.Metric {
return prometheus.MustNewConstMetric(
mc.PowerSupplyHealth,
prometheus.GaugeValue,
value,
float64(value),
id,
health,
)
Expand Down Expand Up @@ -251,14 +251,13 @@ func (mc *Collector) NewPowerControlInterval(interval int, id, name string) prom
)
}

func (mc *Collector) NewSelEntry(id string, message string, component string, severity string, created time.Time) prometheus.Metric {
func (mc *Collector) NewEventLogEntry(id string, message string, severity string, created time.Time) prometheus.Metric {
return prometheus.MustNewConstMetric(
mc.SelEntry,
mc.EventLogEntry,
prometheus.CounterValue,
float64(created.Unix()),
id,
message,
component,
severity,
)
}
Expand Down Expand Up @@ -292,7 +291,7 @@ func (mc *Collector) NewDriveHealth(id, health string) prometheus.Metric {
return prometheus.MustNewConstMetric(
mc.DriveHealth,
prometheus.GaugeValue,
value,
float64(value),
id,
health,
)
Expand Down Expand Up @@ -336,7 +335,7 @@ func (mc *Collector) NewMemoryModuleHealth(id, health string) prometheus.Metric
return prometheus.MustNewConstMetric(
mc.MemoryModuleHealth,
prometheus.GaugeValue,
value,
float64(value),
id,
health,
)
Expand Down Expand Up @@ -365,7 +364,7 @@ func (mc *Collector) NewNetworkInterfaceHealth(id, health string) prometheus.Met
return prometheus.MustNewConstMetric(
mc.NetworkInterfaceHealth,
prometheus.GaugeValue,
value,
float64(value),
id,
health,
)
Expand All @@ -376,7 +375,7 @@ func (mc *Collector) NewNetworkPortHealth(iface, id, health string) prometheus.M
return prometheus.MustNewConstMetric(
mc.NetworkPortHealth,
prometheus.GaugeValue,
value,
float64(value),
iface,
id,
health,
Expand All @@ -398,7 +397,7 @@ func (mc *Collector) NewNetworkPortLinkUp(iface, id, status string) prometheus.M
return prometheus.MustNewConstMetric(
mc.NetworkPortLinkUp,
prometheus.GaugeValue,
value,
float64(value),
iface,
id,
status,
Expand Down
27 changes: 13 additions & 14 deletions internal/collector/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package collector

import (
"strconv"
"time"
)

const (
Expand Down Expand Up @@ -472,21 +471,21 @@ func (psu *PowerSupplyUnit) GetOutputPower() float64 {
return psu.LastPowerOutputWatts
}

type IdracSelResponse struct {
type EventLogResponse struct {
Name string `json:"Name"`
Description string `json:"Description"`
Members []struct {
Id string `json:"Id"`
Name string `json:"Name"`
Created time.Time `json:"Created"`
Description string `json:"Description"`
EntryCode xstring `json:"EntryCode"`
EntryType string `json:"EntryType"`
Message string `json:"Message"`
MessageArgs []any `json:"MessageArgs"`
MessageId string `json:"MessageId"`
SensorNumber int `json:"SensorNumber"`
SensorType xstring `json:"SensorType"`
Severity string `json:"Severity"`
Id string `json:"Id"`
Name string `json:"Name"`
Created string `json:"Created"`
Description string `json:"Description"`
EntryCode xstring `json:"EntryCode"`
EntryType string `json:"EntryType"`
Message string `json:"Message"`
MessageArgs []any `json:"MessageArgs"`
MessageId string `json:"MessageId"`
SensorNumber int `json:"SensorNumber"`
SensorType xstring `json:"SensorType"`
Severity string `json:"Severity"`
} `json:"Members"`
}
26 changes: 26 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package config
import (
"math"
"os"
"strings"
"time"

"github.com/mrlhansen/idrac_exporter/internal/log"
"gopkg.in/yaml.v2"
Expand Down Expand Up @@ -54,6 +56,7 @@ func ReadConfig(filename string) {

readConfigEnv()

// main section
if Config.Address == "" {
Config.Address = "0.0.0.0"
}
Expand All @@ -74,6 +77,7 @@ func ReadConfig(filename string) {
Config.MetricsPrefix = "idrac"
}

// hosts section
if len(Config.Hosts) == 0 {
log.Fatal("Invalid configuration: empty section: hosts")
}
Expand All @@ -87,4 +91,26 @@ func ReadConfig(filename string) {
}
v.Hostname = k
}

// events section
switch strings.ToLower(Config.Event.Severity) {
case "ok":
Config.Event.SeverityLevel = 0
case "warning", "":
Config.Event.SeverityLevel = 1
case "critical":
Config.Event.SeverityLevel = 2
default:
log.Fatal("Invalid configuration: invalid value: %s", Config.Event.Severity)
}

if Config.Event.MaxAge == "" {
Config.Event.MaxAge = "168h"
}

t, err := time.ParseDuration(Config.Event.MaxAge)
if err != nil {
log.Fatal("Invalid configuration: unable to parse duration: %v", err)
}
Config.Event.MaxAgeSeconds = t.Seconds()
}
4 changes: 3 additions & 1 deletion internal/config/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ func readConfigEnv() {
getEnvString("CONFIG_METRICS_PREFIX", &Config.MetricsPrefix)
getEnvString("CONFIG_DEFAULT_USERNAME", &username)
getEnvString("CONFIG_DEFAULT_PASSWORD", &password)
getEnvString("CONFIG_EVENT_SEVERITY", &Config.Event.Severity)
getEnvString("CONFIG_EVENT_MAXAGE", &Config.Event.MaxAge)

getEnvUint("CONFIG_PORT", &Config.Port)
getEnvUint("CONFIG_TIMEOUT", &Config.Timeout)
getEnvUint("CONFIG_RETRIES", &Config.Retries)

getEnvBool("CONFIG_METRICS_SYSTEM", &Config.Collect.System)
getEnvBool("CONFIG_METRICS_SENSORS", &Config.Collect.Sensors)
getEnvBool("CONFIG_METRICS_SEL", &Config.Collect.SEL)
getEnvBool("CONFIG_METRICS_EVENTS", &Config.Collect.Events)
getEnvBool("CONFIG_METRICS_POWER", &Config.Collect.Power)
getEnvBool("CONFIG_METRICS_STORAGE", &Config.Collect.Storage)
getEnvBool("CONFIG_METRICS_MEMORY", &Config.Collect.Memory)
Expand Down
10 changes: 9 additions & 1 deletion internal/config/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,27 @@ type HostConfig struct {
type CollectConfig struct {
System bool `yaml:"system"`
Sensors bool `yaml:"sensors"`
SEL bool `yaml:"sel"`
Events bool `yaml:"events"`
Power bool `yaml:"power"`
Storage bool `yaml:"storage"`
Memory bool `yaml:"memory"`
Network bool `yaml:"network"`
}

type EventConfig struct {
Severity string `yaml:"severity"`
MaxAge string `yaml:"maxage"`
SeverityLevel int
MaxAgeSeconds float64
}

type RootConfig struct {
mutex sync.Mutex
Address string `yaml:"address"`
Port uint `yaml:"port"`
MetricsPrefix string `yaml:"metrics_prefix"`
Collect CollectConfig `yaml:"metrics"`
Event EventConfig `yaml:"events"`
Timeout uint `yaml:"timeout"`
Retries uint `yaml:"retries"`
Hosts map[string]*HostConfig `yaml:"hosts"`
Expand Down
Loading

0 comments on commit f05789c

Please sign in to comment.