Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates to storage controller and memory metrics #80

Merged
merged 3 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project.
- Add HPE Proliant XL420 Support [#33](https://github.com/Comcast/fishymetrics/issues/33)
- consolidate exporters into a single generic exporter [#52](https://github.com/Comcast/fishymetrics/issues/52)
- update Dockerfile to comply with opensource packaging requirements [#61](https://github.com/Comcast/fishymetrics/issues/61)
- Storage controller status metric for HP servers [#79](https://github.com/Comcast/fishymetrics/issues/79)
- Ignore CPU metrics if Processor is Absent [#79](https://github.com/Comcast/fishymetrics/issues/79)
- Added support for metrics collection from Dell servers [#77](https://github.com/Comcast/fishymetrics/issues/77)

## Fixed

Expand Down
68 changes: 64 additions & 4 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,14 @@ type SystemEndpoints struct {
systems []string
power []string
thermal []string
volumes []string
virtualDrives []string
}

type DriveEndpoints struct {
logicalDriveURLs []string
physicalDriveURLs []string
arrayControllerURLs []string
logicalDriveURLs []string
physicalDriveURLs []string
}

type Excludes map[string]interface{}
Expand Down Expand Up @@ -238,8 +241,47 @@ func NewExporter(ctx context.Context, target, uri, profile, model string, exclud
return nil, err
}

// newer servers have volumes endpoint in storage controller, these volumes hold virtual drives member urls
if len(sysEndpoints.storageController) > 0 {
var controllerOutput oem.System
for _, controller := range sysEndpoints.storageController {
controllerOutput, err = getSystemsMetadata(exp.url+controller, target, retryClient)
if err != nil {
log.Error("error when getting storage controller metadata", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID")))
return nil, err
}
if controllerOutput.Volumes.URL != "" {
url := appendSlash(controllerOutput.Volumes.URL)
if checkUnique(sysEndpoints.volumes, url) {
sysEndpoints.volumes = append(sysEndpoints.volumes, url)
}
}
}
}
if len(sysEndpoints.volumes) > 0 {
for _, volume := range sysEndpoints.volumes {
virtualDrives, err := getMemberUrls(exp.url+volume, target, retryClient)
if err != nil {
log.Error("error when getting virtual drive member urls", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID")))
return nil, err
}
if len(virtualDrives) > 0 {
for _, virtualDrive := range virtualDrives {
if strings.Contains(virtualDrive, "Virtual") {
url := appendSlash(virtualDrive)
if checkUnique(sysEndpoints.virtualDrives, url) {
sysEndpoints.virtualDrives = append(sysEndpoints.virtualDrives, url)
}
}
}
}
}
}

log.Debug("systems endpoints response", zap.Strings("systems_endpoints", sysEndpoints.systems),
zap.Strings("storage_ctrl_endpoints", sysEndpoints.storageController),
zap.Strings("volumes_endpoints", sysEndpoints.volumes),
zap.Strings("virtual_drives_endpoints", sysEndpoints.virtualDrives),
zap.Strings("drives_endpoints", sysEndpoints.drives),
zap.Strings("power_endpoints", sysEndpoints.power),
zap.Strings("thermal_endpoints", sysEndpoints.thermal),
Expand Down Expand Up @@ -305,11 +347,24 @@ func NewExporter(ctx context.Context, target, uri, profile, model string, exclud
}
}

log.Debug("drive endpoints response", zap.Strings("logical_drive_endpoints", driveEndpointsResp.logicalDriveURLs),
if len(sysEndpoints.storageController) == 0 && ss == "" {
driveEndpointsResp, err = getAllDriveEndpoints(ctx, exp.url, exp.url+sysEndpoints.systems[0]+"Storage/", target, retryClient)
if err != nil {
log.Error("error when getting drive endpoints", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID")))
return nil, err
}
}

log.Debug("drive endpoints response", zap.Strings("array_controller_endpoints", driveEndpointsResp.arrayControllerURLs),
zap.Strings("logical_drive_endpoints", driveEndpointsResp.logicalDriveURLs),
zap.Strings("physical_drive_endpoints", driveEndpointsResp.physicalDriveURLs),
zap.Any("trace_id", ctx.Value("traceID")))

// Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool
// Loop through arrayControllerURLs, logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool
for _, url := range driveEndpointsResp.arrayControllerURLs {
tasks = append(tasks, pool.NewTask(common.Fetch(exp.url+url, target, profile, retryClient), exp.url+url, handle(&exp, STORAGE_CONTROLLER)))
}

for _, url := range driveEndpointsResp.logicalDriveURLs {
tasks = append(tasks, pool.NewTask(common.Fetch(exp.url+url, target, profile, retryClient), exp.url+url, handle(&exp, LOGICALDRIVE)))
}
Expand All @@ -328,6 +383,11 @@ func NewExporter(ctx context.Context, target, uri, profile, model string, exclud
tasks = append(tasks, pool.NewTask(common.Fetch(exp.url+url, target, profile, retryClient), exp.url+url, handle(&exp, STORAGE_CONTROLLER)))
}

// virtual drives
for _, url := range sysEndpoints.virtualDrives {
tasks = append(tasks, pool.NewTask(common.Fetch(exp.url+url, target, profile, retryClient), exp.url+url, handle(&exp, LOGICALDRIVE)))
}

// power
for _, url := range sysEndpoints.power {
tasks = append(tasks, pool.NewTask(common.Fetch(exp.url+url, target, profile, retryClient), exp.url+url, handle(&exp, POWER)))
Expand Down
69 changes: 64 additions & 5 deletions exporter/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,24 @@ func (e *Exporter) exportPowerMetrics(body []byte) error {
for _, pv := range pm.Voltages {
if pv.Status.State == "Enabled" {
var volts float64
var upperThresCrit float64
switch pv.ReadingVolts.(type) {
case float64:
volts = pv.ReadingVolts.(float64)
case string:
volts, _ = strconv.ParseFloat(pv.ReadingVolts.(string), 32)
}
(*pow)["voltageOutput"].WithLabelValues(pv.Name, e.ChassisSerialNumber, e.Model).Set(volts)
switch pv.UpperThresholdCritical.(type) {
case float64:
upperThresCrit = pv.UpperThresholdCritical.(float64)
case string:
upperThresCrit, _ = strconv.ParseFloat(pv.UpperThresholdCritical.(string), 32)
}
if volts == 0 && upperThresCrit == 0 {
continue
} else {
(*pow)["voltageOutput"].WithLabelValues(pv.Name, e.ChassisSerialNumber, e.Model).Set(volts)
}
if pv.Status.Health == "OK" {
state = OK
} else {
Expand Down Expand Up @@ -199,7 +210,11 @@ func (e *Exporter) exportThermalMetrics(body []byte) error {
}

if fan.FanName != "" {
(*therm)["fanSpeed"].WithLabelValues(fan.FanName, e.ChassisSerialNumber, e.Model).Set(float64(fan.CurrentReading))
if float64(fan.CurrentReading) != 0 {
(*therm)["fanSpeed"].WithLabelValues(fan.FanName, e.ChassisSerialNumber, e.Model).Set(float64(fan.CurrentReading))
} else {
(*therm)["fanSpeed"].WithLabelValues(fan.FanName, e.ChassisSerialNumber, e.Model).Set(fanSpeed)
}
} else {
(*therm)["fanSpeed"].WithLabelValues(fan.Name, e.ChassisSerialNumber, e.Model).Set(fanSpeed)
}
Expand Down Expand Up @@ -291,10 +306,24 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error {
var state float64
var dllogical oem.LogicalDriveMetrics
var dllogicaldrive = (*e.DeviceMetrics)["logicalDriveMetrics"]
var ldName string
var raidType string
var volIdentifier string
err := json.Unmarshal(body, &dllogical)
if err != nil {
return fmt.Errorf("Error Unmarshalling LogicalDriveMetrics - " + err.Error())
}
if dllogical.Raid == "" {
ldName = dllogical.DisplayName
raidType = dllogical.RaidType
if len(dllogical.Identifiers) > 0 {
volIdentifier = dllogical.Identifiers[0].DurableName
}
} else {
ldName = dllogical.LogicalDriveName
raidType = dllogical.Raid
volIdentifier = dllogical.VolumeUniqueIdentifier
}
// Check physical drive is enabled then check status and convert string to numeric values
if dllogical.Status.State == "Enabled" {
if dllogical.Status.Health == "OK" {
Expand All @@ -306,7 +335,7 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error {
state = DISABLED
}

(*dllogicaldrive)["raidStatus"].WithLabelValues(dllogical.Name, e.ChassisSerialNumber, e.Model, dllogical.LogicalDriveName, dllogical.VolumeUniqueIdentifier, dllogical.Raid).Set(state)
(*dllogicaldrive)["raidStatus"].WithLabelValues(dllogical.Name, e.ChassisSerialNumber, e.Model, ldName, volIdentifier, raidType).Set(state)
return nil
}

Expand Down Expand Up @@ -374,13 +403,26 @@ func (e *Exporter) exportStorageControllerMetrics(body []byte) error {
if sc.Status.State == "Enabled" {
if sc.Status.Health == "OK" {
state = OK
} else if sc.Status.Health == "" && sc.Status.HealthRollup == "" {
continue
} else {
state = BAD
}
(*drv)["storageControllerStatus"].WithLabelValues(scm.Name, e.ChassisSerialNumber, e.Model, sc.FirmwareVersion, sc.Model).Set(state)
}
}

if len(scm.StorageController.StorageController) == 0 {
if scm.Status.State == "Enabled" {
if scm.Status.Health == "OK" {
state = OK
} else {
state = BAD
}
(*drv)["storageControllerStatus"].WithLabelValues(scm.Name, e.ChassisSerialNumber, e.Model, scm.ControllerFirmware.FirmwareVersion, scm.Model).Set(state)
}
}

return nil
}

Expand All @@ -390,18 +432,29 @@ func (e *Exporter) exportMemorySummaryMetrics(body []byte) error {
var state float64
var dlm oem.System
var dlMemory = (*e.DeviceMetrics)["memoryMetrics"]
var totalSystemMemoryGiB string
err := json.Unmarshal(body, &dlm)
if err != nil {
return fmt.Errorf("Error Unmarshalling MemorySummaryMetrics - " + err.Error())
}
// Check memory status and convert string to numeric values
if dlm.MemorySummary.Status.HealthRollup == "OK" {
// Ignore memory summary if status is not present
if dlm.MemorySummary.Status.HealthRollup == "" {
return nil
} else if dlm.MemorySummary.Status.HealthRollup == "OK" {
state = OK
} else {
state = BAD
}

(*dlMemory)["memoryStatus"].WithLabelValues(e.ChassisSerialNumber, e.Model, strconv.Itoa(dlm.MemorySummary.TotalSystemMemoryGiB)).Set(state)
switch dlm.MemorySummary.TotalSystemMemoryGiB.(type) {
case int:
totalSystemMemoryGiB = strconv.Itoa(dlm.MemorySummary.TotalSystemMemoryGiB.(int))
case float64:
totalSystemMemoryGiB = strconv.FormatFloat(dlm.MemorySummary.TotalSystemMemoryGiB.(float64), 'f', -1, 64)
}

(*dlMemory)["memoryStatus"].WithLabelValues(e.ChassisSerialNumber, e.Model, totalSystemMemoryGiB).Set(state)

return nil
}
Expand Down Expand Up @@ -566,6 +619,12 @@ func (e *Exporter) exportProcessorMetrics(body []byte) error {
case int:
totCores = strconv.Itoa(pm.TotalCores.(int))
}

// Ignore metrics if processor is absent
if pm.Status.State == "Absent" {
return nil
}

if pm.Status.Health == "OK" {
state = OK
} else {
Expand Down
37 changes: 37 additions & 0 deletions exporter/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,43 @@ func getAllDriveEndpoints(ctx context.Context, fqdn, initialUrl, host string, cl
return driveEndpoints, err
}

// This if condition is for servers with iLO6. Gather metrics only from controllers with drives
// /redfish/v1/Systems/XXXX/Storage/XXXXX/
if len(arrayCtrlResp.StorageDrives) > 0 {
for _, member := range arrayCtrlResp.StorageDrives {
driveEndpoints.physicalDriveURLs = append(driveEndpoints.physicalDriveURLs, appendSlash(member.URL))
}

// If Volumes are present, parse volumes endpoint until all urls are found
if arrayCtrlResp.Volumes.URL != "" {
volumeOutput, err := getDriveEndpoint(fqdn+arrayCtrlResp.Volumes.URL, host, client)
if err != nil {
log.Error("api call "+fqdn+arrayCtrlResp.Volumes.URL+" failed", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID")))
return driveEndpoints, err
}

for _, member := range volumeOutput.Members {
driveEndpoints.logicalDriveURLs = append(driveEndpoints.logicalDriveURLs, appendSlash(member.URL))
}
}

if arrayCtrlResp.Controllers.URL != "" {
controllerOutput, err := getDriveEndpoint(fqdn+arrayCtrlResp.Controllers.URL, host, client)
if err != nil {
log.Error("api call "+fqdn+arrayCtrlResp.Controllers.URL+" failed", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID")))
return driveEndpoints, err
}

for _, member := range controllerOutput.Members {
driveEndpoints.arrayControllerURLs = append(driveEndpoints.arrayControllerURLs, appendSlash(member.URL))
}
}
} else if arrayCtrlResp.LinksUpper.PhysicalDrives.URL != "" || arrayCtrlResp.LinksLower.PhysicalDrives.URL != "" {
// /redfish/v1/Systems/XXXX/SmartStorage/ArrayControllers/X/
driveEndpoints.arrayControllerURLs = append(driveEndpoints.arrayControllerURLs, appendSlash(member.URL))
}

// all other servers apart from iLO6
// If LogicalDrives is present, parse logical drive endpoint until all urls are found
if arrayCtrlResp.LinksUpper.LogicalDrives.URL != "" {
logicalDriveOutput, err := getDriveEndpoint(fqdn+arrayCtrlResp.LinksUpper.LogicalDrives.URL, host, client)
Expand Down
41 changes: 26 additions & 15 deletions oem/drive.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,24 @@ type NVMeDriveMetrics struct {
// Logical Drives
// /redfish/v1/Systems/X/SmartStorage/ArrayControllers/X/LogicalDrives/X/
type LogicalDriveMetrics struct {
Id string `json:"Id"`
CapacityMiB int `json:"CapacityMiB"`
Description string `json:"Description"`
InterfaceType string `json:"InterfaceType"`
LogicalDriveName string `json:"LogicalDriveName"`
LogicalDriveNumber int `json:"LogicalDriveNumber"`
Name string `json:"Name"`
Raid string `json:"Raid"`
Status Status `json:"Status"`
StripeSizebytes int `json:"StripeSizebytes"`
VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"`
Id string `json:"Id"`
CapacityMiB int `json:"CapacityMiB"`
Description string `json:"Description"`
DisplayName string `json:"DisplayName"`
InterfaceType string `json:"InterfaceType"`
Identifiers []Identifiers `json:"Identifiers"`
LogicalDriveName string `json:"LogicalDriveName"`
LogicalDriveNumber int `json:"LogicalDriveNumber"`
Name string `json:"Name"`
Raid string `json:"Raid"`
RaidType string `json:"RAIDType"`
Status Status `json:"Status"`
StripeSizebytes int `json:"StripeSizebytes"`
VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"`
}

type Identifiers struct {
DurableName string `json:"DurableName"`
}

// Disk Drives
Expand Down Expand Up @@ -116,10 +123,14 @@ func (w *LocationWrapper) UnmarshalJSON(data []byte) error {
// /redfish/v1/Systems/X/SmartStorage/ArrayControllers/ for Logical and Physical Drives
// /redfish/v1/Chassis/X/Drives/ for NVMe Drive(s)
type GenericDrive struct {
Members []Members `json:"Members,omitempty"`
LinksUpper LinksUpper `json:"Links,omitempty"`
LinksLower LinksLower `json:"links,omitempty"`
MembersCount int `json:"[email protected],omitempty"`
Members []Members `json:"Members,omitempty"`
LinksUpper LinksUpper `json:"Links,omitempty"`
LinksLower LinksLower `json:"links,omitempty"`
MembersCount int `json:"[email protected],omitempty"`
DriveCount int `json:"[email protected],omitempty"`
StorageDrives []Link `json:"Drives,omitempty"`
Volumes Link `json:"Volumes,omitempty"`
Controllers Link `json:"Controllers,omitempty"`
}

type Members struct {
Expand Down
2 changes: 1 addition & 1 deletion oem/power.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type PowerMetrics struct {
// PowerControl is the top level json object for metadata on power supply consumption
type PowerControl struct {
MemberID string `json:"MemberId"`
PowerCapacityWatts int `json:"PowerCapacityWatts,omitempty"`
PowerCapacityWatts interface{} `json:"PowerCapacityWatts,omitempty"`
PowerConsumedWatts interface{} `json:"PowerConsumedWatts"`
PowerMetrics PowerMetric `json:"PowerMetrics"`
}
Expand Down
Loading
Loading