diff --git a/CHANGELOG.md b/CHANGELOG.md index e1e58fe..059f7b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project. ## Unreleased ## Added -- Add ability to reference different vault paths for credential retrieval [#25](https://github.com/Comcast/fishymetrics/issues/25) + +- Add ability to reference different vault paths for credential retrieval [#25](https://github.com/Comcast/fishymetrics/issues/25) - Added HPE DL380 Gen10 support [#17](https://github.com/Comcast/fishymetrics/issues/17) - Enhanced drive metrics collection for DL380 model servers to include NVME, Storage Disk Drives, and Logical Drives [#17](https://github.com/Comcast/fishymetrics/issues/17) - Add ability to send logs directly to elasticsearch endpoints [#10](https://github.com/Comcast/fishymetrics/issues/10) @@ -15,9 +16,12 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project. - Add HPE Proliant XL420 Support [#33](https://github.com/Comcast/fishymetrics/issues/33) ## Fixed + - Cisco UCS C220 - add additional edge cases when collecting memory metrics [#2](https://github.com/Comcast/fishymetrics/issues/2) +- null pointer derefence errors when using incorrect credentials [#36](https://github.com/Comcast/fishymetrics/issues/36) ## Updated + - Enhanced drive metrics collection for HPE DL360 model servers to include NVME, Storage Disk Drives, and Logical Drives. [#31](https://github.com/Comcast/fishymetrics/issues/31) - Removed references to internal URLs/FQDNs to opensource the project - Cisco S3260M5 module to support FW Ver 4.2(xx) [#18](https://github.com/Comcast/fishymetrics/issues/18) @@ -26,33 +30,41 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project. ## [0.7.1] ## Added + - added a mux prometheus middleware to collect and export metrics for every http request ## Fixed + - fix route issue from the /ignored html template ## [0.7.0] ## Fixed + - fixed Horizontal Pod Autoscaling k8s resource in helm chart ## Updated + - move buildinfo package to inside the fishymetrics repo - update all go dependencies in project to remove any potential security bugs ## [0.6.16] ## Added + - add Horizontal Pod Autoscaling capabilities - add ability to customize container resource limits/requests ## Fixed + - route prefix for metrics and info API paths ## Removed + - remove route prefix configuration ## Updated + - rename app container port name to exporter from metrics - improve README documentation - Add build info to the root home page @@ -61,36 +73,44 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project. ## [0.6.15] ## Changed + - Modified vector config in the helm chart to fix structured json log messages to elastic ## [0.6.14] ## Added + - added trace_id to all logging messages ## Changed + - fixed for loop logic for a targets scrape - updated vector config to include a json remap transform ## [0.6.13] ## Added + - added ability to forward logs to an elastic cluster using vector ## Changed + - changed logging from oyez to zap package ## [0.6.12] ## Added + - add BIOS version to device_info metric - add more labels to cisco device metrics to help with RMA automation ## Changed + - incease scrape timeout to 90 seconds for c220 devices - update helm chart to reflect updated env vars ## Fixed + - fix CI bug with Dockerfile - add DISABLED state for power and drive metric scrapes - add DISABLED state for memory and processor metric scrapes @@ -98,75 +118,90 @@ log is based on the [Keep a CHANGELOG](http://keepachangelog.com/) project. ## [0.6.3] ## Added + - Added metrics for C220 storage/raid controllers and drives when applicable ## [0.6.2] ## Changed -- Change *url* label to be *name* and use the url path base for *name* label value + +- Change _url_ label to be _name_ and use the url path base for _name_ label value ## [0.6.1] ## Added + - Added storage controller status metric for all cisco modules - Added overall temperature status metric for all cisco modules ## Fixed + - Fix s3260m4 exporter module scrape endpoints - Fix retry logic for certain cisco redfish API calls ## [0.6.0] ## Added + - Add vault integration for chassis credentials - Add graceful shutdown of newly added go routines ## [0.5.1] ## Changed + - Temporarily removed drive scrapes from Cisco devices until we figure out the best plan forward ## [0.5.0] ## Added + - Create new prometheus exporters for Cisco UCS C220, S3260 M4, and S3260 M5 devices ## [0.4.1] ## Added + - Added support for DL20 devices ## Fixed + - Fix nil pointer reference for when module name in scrape request does not exist ## [0.4.0] ## Added + - Add support for scrapes to HP DL360s w/ iLO 5 ## [0.3.1] ## Fixed + - Metrics are not reseting the way it used to - Web UI not routing correctly when app is behind nginx-ingress ## [0.3.0] ## Changed + - Centralize fishymetrics exporter to handle more than 1 scrape endpoints ## [0.2.0] ## Added + - Add moonshot switch metrics collection for status, thermal, and power ## [0.1.1] ## Added + - Created Helm chart for deployment - Add limiter and route-prefix flags/env variables ## [0.1.0] ## Added + - Initial commit of fishymetrics exporter diff --git a/Makefile b/Makefile index 05853b0..1cc6a7a 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ docker: . test: - go test -v -p 1 -race ${FLAGS} ${TEST_PKGS} + go test -v -cover -p 1 -race ${FLAGS} ${TEST_PKGS} clean: rm -rf build/ diff --git a/cisco/c220/drive_test.go b/cisco/c220/drive_test.go index 474403c..4b33d07 100644 --- a/cisco/c220/drive_test.go +++ b/cisco/c220/drive_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,11 +116,7 @@ func Test_C220_Drive_Metrics(t *testing.T) { host: "fishymetrics.com", biosVersion: "C220M5.4.0.4i.0.zzzzzzzzz", chassisSerialNumber: "SN78901", - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, + deviceMetrics: metrx, } prometheus.MustRegister(exporter) diff --git a/cisco/c220/exporter.go b/cisco/c220/exporter.go index 0daeda4..c8592ea 100644 --- a/cisco/c220/exporter.go +++ b/cisco/c220/exporter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ import ( "crypto/tls" "encoding/json" "encoding/xml" + "errors" "fmt" "io" "net" @@ -80,15 +81,18 @@ type Exporter struct { credProfile string biosVersion string chassisSerialNumber string - - up prometheus.Gauge - deviceMetrics *map[string]*metrics + deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for Cisco UCS C220 device. func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -130,15 +134,36 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } // chassis system endpoint to use for memory, processor, bios version scrapes sysEndpoint, err := getChassisEndpoint(fqdn.String()+uri+"/Managers/CIMC", target, retryClient) if err != nil { log.Error("error when getting chassis endpoint from "+C220, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: C220, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } return nil, err } - chassisSN := path.Base(sysEndpoint) + exp.chassisSerialNumber = path.Base(sysEndpoint) // chassis BIOS version biosVer, err := getBIOSVersion(fqdn.String()+sysEndpoint, target, retryClient) @@ -146,6 +171,7 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e log.Error("error when getting BIOS version from "+C220, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) return nil, err } + exp.biosVersion = biosVer // DIMM endpoints array dimms, err := getDIMMEndpoints(fqdn.String()+sysEndpoint+"/Memory", target, retryClient) @@ -190,24 +216,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e } } - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - biosVersion: biosVer, - chassisSerialNumber: chassisSN, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - }, nil + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -218,7 +229,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -233,10 +243,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -270,7 +280,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -279,7 +289,10 @@ func (e *Exporter) scrape() { } log.Info("added host "+e.host+" to ignored list", zap.Any("trace_id", e.ctx.Value("traceID"))) deviceState = 2 - e.up.Set(float64(deviceState)) + + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) + log.Error("error from "+C220, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return @@ -328,7 +341,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -675,7 +689,11 @@ func getChassisEndpoint(url, host string, client *retryablehttp.Client) (string, } defer resp.Body.Close() if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return "", fmt.Errorf("HTTP status %d", resp.StatusCode) + if resp.StatusCode == http.StatusUnauthorized { + return "", common.ErrInvalidCredential + } else { + return "", fmt.Errorf("HTTP status %d", resp.StatusCode) + } } body, err := io.ReadAll(resp.Body) diff --git a/cisco/c220/exporter_test.go b/cisco/c220/exporter_test.go new file mode 100644 index 0000000..eb6fd07 --- /dev/null +++ b/cisco/c220/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package c220 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_C220_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Managers/CIMC" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/cisco/c220/memory_test.go b/cisco/c220/memory_test.go index 570d9af..53d7663 100644 --- a/cisco/c220/memory_test.go +++ b/cisco/c220/memory_test.go @@ -1,5 +1,5 @@ // /* -// * Copyright 2023 Comcast Cable Communications Management, LLC +// * Copyright 2024 Comcast Cable Communications Management, LLC // * // * Licensed under the Apache License, Version 2.0 (the "License"); // * you may not use this file except in compliance with the License. @@ -97,11 +97,7 @@ func Test_C220_Memory_Metrics(t *testing.T) { host: "fishymetrics.com", biosVersion: "C220M5.4.0.4i.0.zzzzzzzzz", chassisSerialNumber: "SN78901", - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, + deviceMetrics: metrx, } prometheus.MustRegister(exporter) diff --git a/cisco/c220/metrics.go b/cisco/c220/metrics.go index e61f25d..17c445a 100644 --- a/cisco/c220/metrics.go +++ b/cisco/c220/metrics.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("c220_thermal_fan_speed", "Current fan speed in the unit of RPM", nil, []string{"name", "chassisSerialNumber"}), "fanStatus": newServerMetric("c220_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name", "chassisSerialNumber"}), @@ -69,6 +73,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "memoryMetrics": MemoryMetrics, diff --git a/cisco/s3260m4/exporter.go b/cisco/s3260m4/exporter.go index 5c90a84..298858a 100644 --- a/cisco/s3260m4/exporter.go +++ b/cisco/s3260m4/exporter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -77,15 +78,18 @@ type Exporter struct { credProfile string biosVersion string chassisSerialNumber string - - up prometheus.Gauge - deviceMetrics *map[string]*metrics + deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for Cisco UCS S3260M4 device. func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } var mgr string log = zap.L() @@ -128,11 +132,32 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } // chassis system endpoint to use for memory, processor, bios version scrapes mgrEndpoints, err := getManagerEndpoint(fqdn.String()+uri+"/Managers/BMC2", target, retryClient) if err != nil { log.Error("error when getting managers endpoint from "+S3260M4, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: S3260M4, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } return nil, err } @@ -146,6 +171,7 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e log.Error("error when getting BIOS version from "+S3260M4, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) return nil, err } + exp.biosVersion = biosVer // chassis serial number chassisSN, err := getChassisSerialNumber(fqdn.String()+uri+"/Chassis/CMC", target, retryClient) @@ -153,6 +179,7 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e log.Error("error when getting chassis serial number from "+S3260M4, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) return nil, err } + exp.chassisSerialNumber = chassisSN // DIMM endpoints array dimms, err := getDIMMEndpoints(fqdn.String()+mgr+"/Memory", target, retryClient) @@ -191,24 +218,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e pool.NewTask(common.Fetch(fqdn.String()+dimm.URL, MEMORY, target, profile, retryClient))) } - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - biosVersion: biosVer, - chassisSerialNumber: chassisSN, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - }, nil + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -219,7 +231,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -234,10 +245,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -271,7 +282,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -283,7 +294,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+S3260M4, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -318,7 +330,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -591,7 +604,11 @@ func getManagerEndpoint(url, host string, client *retryablehttp.Client) (Chassis } defer resp.Body.Close() if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return chas, fmt.Errorf("HTTP status %d", resp.StatusCode) + if resp.StatusCode == http.StatusUnauthorized { + return chas, common.ErrInvalidCredential + } else { + return chas, fmt.Errorf("HTTP status %d", resp.StatusCode) + } } body, err := io.ReadAll(resp.Body) diff --git a/cisco/s3260m4/exporter_test.go b/cisco/s3260m4/exporter_test.go new file mode 100644 index 0000000..7759bf6 --- /dev/null +++ b/cisco/s3260m4/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3260m4 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_S3260M4_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Managers/BMC2" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/cisco/s3260m4/metrics.go b/cisco/s3260m4/metrics.go index 525c89d..2fc6574 100644 --- a/cisco/s3260m4/metrics.go +++ b/cisco/s3260m4/metrics.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("s3260m4_thermal_fan_speed", "Current fan speed in the unit of RPM", nil, []string{"name", "chassisSerialNumber"}), "fanStatus": newServerMetric("s3260m4_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name", "chassisSerialNumber"}), @@ -68,6 +72,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "memoryMetrics": MemoryMetrics, diff --git a/cisco/s3260m5/exporter.go b/cisco/s3260m5/exporter.go index 83b08a8..0b875d3 100644 --- a/cisco/s3260m5/exporter.go +++ b/cisco/s3260m5/exporter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -78,15 +79,18 @@ type Exporter struct { credProfile string biosVersion string chassisSerialNumber string - - up prometheus.Gauge - deviceMetrics *map[string]*metrics + deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for Cisco UCS S3260M5 device. func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } var mgr string log = zap.L() @@ -129,11 +133,32 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } // chassis system endpoint to use for memory, processor, bios version scrapes mgrEndpoints, err := getManagerEndpoint(fqdn.String()+uri+"/Managers/BMC1", target, retryClient) if err != nil { log.Error("error when getting managers endpoint from "+S3260M5, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: S3260M5, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } return nil, err } @@ -154,6 +179,7 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e log.Error("error when getting BIOS version from "+S3260M5, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) return nil, err } + exp.biosVersion = biosVer // DIMM endpoints array dimms, err := getDIMMEndpoints(fqdn.String()+mgr+"/Memory", target, retryClient) @@ -175,6 +201,7 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e log.Error("error when getting chassis serial number from "+S3260M5, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) return nil, err } + exp.chassisSerialNumber = chassisSN serial := path.Base(mgr) @@ -208,24 +235,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, e pool.NewTask(common.Fetch(fqdn.String()+dimm.URL, MEMORY, target, profile, retryClient))) } - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - biosVersion: biosVer, - chassisSerialNumber: chassisSN, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - }, nil + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -236,7 +248,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -251,10 +262,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -288,7 +299,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -300,7 +311,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+S3260M5, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -335,7 +347,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -556,6 +569,8 @@ func getManagerEndpoint(url, host string, client *retryablehttp.Client) (Chassis } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { return chas, fmt.Errorf("HTTP status %d", resp.StatusCode) } + } else if resp.StatusCode == http.StatusUnauthorized { + return chas, common.ErrInvalidCredential } else { return chas, fmt.Errorf("HTTP status %d", resp.StatusCode) } diff --git a/cisco/s3260m5/exporter_test.go b/cisco/s3260m5/exporter_test.go new file mode 100644 index 0000000..7543042 --- /dev/null +++ b/cisco/s3260m5/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3260m5 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_S3260M5_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Managers/BMC1" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/cisco/s3260m5/metrics.go b/cisco/s3260m5/metrics.go index 045b10e..c0b7d6a 100644 --- a/cisco/s3260m5/metrics.go +++ b/cisco/s3260m5/metrics.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("s3260m5_thermal_fan_speed", "Current fan speed in the unit of RPM", nil, []string{"name", "chassisSerialNumber"}), "fanStatus": newServerMetric("s3260m5_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name", "chassisSerialNumber"}), @@ -68,6 +72,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "memoryMetrics": MemoryMetrics, diff --git a/cmd/fishymetrics/main.go b/cmd/fishymetrics/main.go index 0c1fe0a..100df91 100644 --- a/cmd/fishymetrics/main.go +++ b/cmd/fishymetrics/main.go @@ -149,17 +149,17 @@ func handler(ctx context.Context, w http.ResponseWriter, r *http.Request) { switch moduleName { case "moonshot": - exporter = moonshot.NewExporter(r.Context(), target, uri, credProf) + exporter, err = moonshot.NewExporter(r.Context(), target, uri, credProf) case "dl380": - exporter = dl380.NewExporter(r.Context(), target, uri, credProf) + exporter, err = dl380.NewExporter(r.Context(), target, uri, credProf) case "dl360": - exporter = dl360.NewExporter(r.Context(), target, uri, credProf) + exporter, err = dl360.NewExporter(r.Context(), target, uri, credProf) case "dl560": - exporter = dl560.NewExporter(r.Context(), target, uri, credProf) + exporter, err = dl560.NewExporter(r.Context(), target, uri, credProf) case "dl20": - exporter = dl20.NewExporter(r.Context(), target, uri, credProf) + exporter, err = dl20.NewExporter(r.Context(), target, uri, credProf) case "xl420": - exporter = xl420.NewExporter(r.Context(), target, uri, credProf) + exporter, err = xl420.NewExporter(r.Context(), target, uri, credProf) case "c220": exporter, err = c220.NewExporter(r.Context(), target, uri, credProf) case "s3260m4": diff --git a/common/util.go b/common/util.go index 9aa3610..a6c7ef3 100644 --- a/common/util.go +++ b/common/util.go @@ -19,6 +19,7 @@ package common import ( "context" "encoding/xml" + "errors" "fmt" "io" "net/http" @@ -29,6 +30,10 @@ import ( "github.com/hashicorp/go-retryablehttp" ) +var ( + ErrInvalidCredential = errors.New("invalid credential") +) + type GenericMetricPayload struct { XMLName xml.Name `xml:"configResolveClass"` Cookie string `xml:"cookie,attr"` @@ -106,7 +111,7 @@ func Fetch(uri, metricType, host, profile string, client *retryablehttp.Client) } ChassisCreds.Set(host, credential) } else { - return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) + return nil, metricType, ErrInvalidCredential } // build new request with updated credentials @@ -118,7 +123,7 @@ func Fetch(uri, metricType, host, profile string, client *retryablehttp.Client) return nil, metricType, fmt.Errorf("Retry DoRequest failed - " + err.Error()) } if resp.StatusCode == http.StatusUnauthorized { - return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) + return nil, metricType, ErrInvalidCredential } } else { return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) diff --git a/go.mod b/go.mod index bed0079..9ec7a73 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/comcast/fishymetrics -go 1.19 +go 1.21 require ( github.com/google/uuid v1.3.0 diff --git a/go.sum b/go.sum index e402a0a..52d1db4 100644 --- a/go.sum +++ b/go.sum @@ -29,6 +29,7 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -134,6 +135,7 @@ github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKs github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= @@ -195,6 +197,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= diff --git a/hpe/dl20/exporter.go b/hpe/dl20/exporter.go index 3ed368c..4413ea8 100644 --- a/hpe/dl20/exporter.go +++ b/hpe/dl20/exporter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "net" "net/http" @@ -64,20 +65,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE DL20 device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -119,6 +123,14 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, profile, retryClient)), @@ -126,22 +138,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1", DRIVE, target, profile, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, profile, retryClient))) - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - } + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -152,7 +151,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -167,10 +165,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -204,7 +202,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -216,7 +214,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+DL20, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -247,7 +246,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } diff --git a/hpe/dl20/metrics.go b/hpe/dl20/metrics.go index 8db4bef..9608031 100644 --- a/hpe/dl20/metrics.go +++ b/hpe/dl20/metrics.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("dl20_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("dl20_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -58,6 +62,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "driveMetrics": DriveMetrics, diff --git a/hpe/dl360/drive.go b/hpe/dl360/drive.go index abbc7cc..f8d1624 100644 --- a/hpe/dl360/drive.go +++ b/hpe/dl360/drive.go @@ -23,6 +23,7 @@ type NVMeDriveMetrics struct { Model string `json:"Model"` Name string `json:"Name"` MediaType string `json:"MediaType"` + Oem Oem `json:"Oem"` PhysicalLocation PhysicalLocation `json:"PhysicalLocation"` Protocol string `json:"Protocol"` Status DriveStatus `json:"Status"` @@ -31,7 +32,7 @@ type NVMeDriveMetrics struct { } // Logical Drives -// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/LogicalDrives/X/ type LogicalDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -47,7 +48,7 @@ type LogicalDriveMetrics struct { } // Disk Drives -// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/DiskDrives/X/ type DiskDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -62,40 +63,42 @@ type DiskDriveMetrics struct { // NVME, Logical, and Physical Disk Drive Status type DriveStatus struct { - Health string `json:"Health,omitempty"` - State string `json:"Enabled,omitempty"` + Health string `json:"Health"` + State string `json:"State,omitempty"` } // GenericDrive is used to iterate over differing drive endpoints // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ for Logical and Physical Drives // /redfish/v1/Chassis/1/Drives/ for NVMe Drive(s) type GenericDrive struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members,omitempty"` - Links *struct { - Drives []struct { - URL string `json:"@odata.id"` - } `json:"Drives,omitempty"` - LogicalDrives *struct { - URL string `json:"@odata.id"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives *struct { - URL string `json:"@odata.id"` - } `json:"PhysicalDrives,omitempty"` - } `json:"Links,omitempty"` - Link *struct { - Drives []struct { - URL string `json:"href"` - } `json:"Drives,omitempty"` - LogicalDrives *struct { - URL string `json:"href"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives *struct { - URL string `json:"href"` - } `json:"PhysicalDrives,omitempty"` - } `json:"links,omitempty"` - MembersCount int `json:"Members@odata.count,omitempty"` + Members []Members `json:"Members,omitempty"` + LinksUpper LinksUpper `json:"Links,omitempty"` + LinksLower LinksLower `json:"links,omitempty"` + MembersCount int `json:"Members@odata.count,omitempty"` +} + +type Members struct { + URL string `json:"@odata.id"` +} + +type LinksUpper struct { + Drives []URL `json:"Drives,omitempty"` + LogicalDrives URL `json:"LogicalDrives,omitempty"` + PhysicalDrives URL `json:"PhysicalDrives,omitempty"` +} + +type LinksLower struct { + Drives []HRef `json:"Drives,omitempty"` + LogicalDrives HRef `json:"LogicalDrives,omitempty"` + PhysicalDrives HRef `json:"PhysicalDrives,omitempty"` +} + +type HRef struct { + URL string `json:"href"` +} + +type URL struct { + URL string `json:"@odata.id"` } // PhysicalLocation @@ -116,6 +119,6 @@ type Oem struct { // Contents of Hpe type HpeCont struct { CurrentTemperatureCelsius int `json:"CurrentTemperatureCelsius"` - DriveStatus DriveStatus `json:"Status"` + DriveStatus DriveStatus `json:"DriveStatus"` NVMeID string `json:"NVMeId"` } diff --git a/hpe/dl360/drive_test.go b/hpe/dl360/drive_test.go new file mode 100644 index 0000000..b8a0c1e --- /dev/null +++ b/hpe/dl360/drive_test.go @@ -0,0 +1,562 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl360 + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + GoodLogicalDriveUpperResponse = ` + # HELP dl360_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl360_logical_drive_status gauge + dl360_logical_drive_status{logicaldrivename="TESTDRIVE NAME 1",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="ABCDEF12345"} 1 + ` + GoodDiskDriveUpperResponse = ` + # HELP dl360_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl360_disk_drive_status gauge + dl360_disk_drive_status{id="0",location="1I:1:1",name="HpeSmartStorageDiskDrive",serialnumber="ABC123"} 1 + ` + GoodLogicalDriveLowerResponse = ` + # HELP dl360_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl360_logical_drive_status gauge + dl360_logical_drive_status{logicaldrivename="TESTDRIVE NAME 2",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="FEDCBA12345"} 1 + ` + GoodDiskDriveLowerResponse = ` + # HELP dl360_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl360_disk_drive_status gauge + dl360_disk_drive_status{id="1",location="1I:1:2",name="HpeSmartStorageDiskDrive",serialnumber="DEF456"} 1 + ` + GoodNvmeDriveResponse = ` + # HELP dl360_nvme_drive_status Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl360_nvme_drive_status gauge + dl360_nvme_drive_status{id="0",protocol="NVMe",serviceLabel="Box 3:Bay 7"} 1 + ` +) + +var ( + GoodDiskDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "0", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:1", + SerialNumber: "ABC123", + }) + + GoodDiskDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "1", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:2", + SerialNumber: "DEF456", + }) + + GoodLogicalDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 1", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "ABCDEF12345", + }) + + GoodLogicalDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 2", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "FEDCBA12345", + }) + + GoodNvmeDrive = MustMarshal(struct { + Id string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + } `json:"Oem"` + PhysicalLocation struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + } `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` + }{ + Id: "0", + Model: "TESTMODEL", + Name: "TESTNAME", + MediaType: "SSD", + Oem: struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + }{ + Hpe: struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + }{ + DriveStatus: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + }, + }, + PhysicalLocation: struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + }{ + PartLocation: struct { + ServiceLabel string `json:"ServiceLabel"` + }{ + ServiceLabel: "Box 3:Bay 7", + }, + }, + Protocol: "NVMe", + FailurePredicted: false, + CapacityBytes: 1600321314816, + }) +) + +func Test_DL360_Drives(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 2, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/", + }, + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + } `json:"Links"` + }{ + LinksUpper: struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksLower struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + } `json:"links"` + }{ + LinksLower: struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Chassis/1/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + } `json:"Links"` + }{ + LinksUpper: struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + }{ + Drives: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/Storage/DA000000/Drives/DA000000/", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + logicalDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportLogicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + physDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportPhysicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + nvmeDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportNVMeDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + exportFunc func(*Exporter, []byte) error + payload []byte + expected string + }{ + { + name: "Good Logical Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl360_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveUpper, + expected: GoodLogicalDriveUpperResponse, + }, + { + name: "Good Logical Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl360_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveLower, + expected: GoodLogicalDriveLowerResponse, + }, + { + name: "Good Disk Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl360_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveUpper, + expected: GoodDiskDriveUpperResponse, + }, + { + name: "Good Disk Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl360_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveLower, + expected: GoodDiskDriveLowerResponse, + }, + { + name: "Good Nvme Drive", + uri: "/redfish/v1/good", + metricName: "dl360_nvme_drive_status", + metricRef1: "nvmeMetrics", + metricRef2: "nvmeDriveStatus", + exportFunc: nvmeDevMetrics, + payload: GoodNvmeDrive, + expected: GoodNvmeDriveResponse, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + err = test.exportFunc(exporter.(*Exporter), test.payload) + if err != nil { + t.Error(err) + } + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl360/exporter.go b/hpe/dl360/exporter.go index 8edddda..a9e983f 100644 --- a/hpe/dl360/exporter.go +++ b/hpe/dl360/exporter.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -69,20 +70,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE DL360 device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -124,6 +128,14 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } // vars for drive parsing var ( @@ -137,89 +149,88 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { // PARSING DRIVE ENDPOINTS // Get initial JSON return of /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ set to output - output, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) - - // Loop through Members to get ArrayController URLs + driveResp, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) if err != nil { log.Error("api call "+fqdn.String()+uri+url+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: DL360, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } + return nil, err } - if output.MembersCount > 0 { - for _, member := range output.Members { - // for each ArrayController URL, get the JSON object - newOutput, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + // Loop through Members to get ArrayController URLs + for _, member := range driveResp.Members { + // for each ArrayController URL, get the JSON object + // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/ + arrayCtrlResp, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+member.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL, target, retryClient) if err != nil { - log.Error("api call "+fqdn.String()+member.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - if newOutput.Links != nil { - // If LogicalDrives is present, parse logical drive endpoint until all urls are found - if newOutput.Links.LogicalDrives != nil && newOutput.Links.LogicalDrives.URL != "" { - logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Links.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Links.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } - - if logicalDriveOutput.MembersCount > 0 { - // loop through each Member in the "LogicalDrive" field - for _, member := range logicalDriveOutput.Members { - // append each URL in the Members array to the logicalDriveURLs array. - logicalDriveURLs = append(logicalDriveURLs, member.URL) - } - } - } + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } - // If PhysicalDrives is present, parse physical drive endpoint until all urls are found - if newOutput.Links.PhysicalDrives != nil && newOutput.Links.PhysicalDrives.URL != "" { - physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Links.PhysicalDrives.URL, target, retryClient) - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Links.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } - - if physicalDriveOutput.MembersCount > 0 { - for _, member := range physicalDriveOutput.Members { - physicalDriveURLs = append(physicalDriveURLs, member.URL) - } - } - } + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - if newOutput.Link != nil { - // If LogicalDrives is present, parse logical drive endpoint until all urls are found - if newOutput.Link.LogicalDrives != nil && newOutput.Link.LogicalDrives.URL != "" { - logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Link.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Link.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } - - if logicalDriveOutput.MembersCount > 0 { - // loop through each Member in the "LogicalDrive" field - for _, member := range logicalDriveOutput.Members { - // append each URL in the Members array to the logicalDriveURLs array. - logicalDriveURLs = append(logicalDriveURLs, member.URL) - } - } - } + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) + } + } - // If PhysicalDrives is present, parse physical drive endpoint until all urls are found - if newOutput.Link.PhysicalDrives != nil && newOutput.Link.PhysicalDrives.URL != "" { - physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Link.PhysicalDrives.URL, target, retryClient) - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Links.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } - - if physicalDriveOutput.MembersCount > 0 { - for _, member := range physicalDriveOutput.Members { - physicalDriveURLs = append(physicalDriveURLs, member.URL) - } - } - } + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if arrayCtrlResp.LinksLower.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } + + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if arrayCtrlResp.LinksLower.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) } } } @@ -228,17 +239,13 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { chassisOutput, err := getDriveEndpoint(fqdn.String()+uri+chassisUrl, target, retryClient) if err != nil { log.Error("api call "+fqdn.String()+uri+chassisUrl+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + return nil, err } // parse through "Links" to find "Drives" array - if chassisOutput.Links != nil { - if len(chassisOutput.Links.Drives) > 0 { - // loop through drives array and append each odata.id url to nvmeDriveURLs list - for _, drive := range chassisOutput.Links.Drives { - nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) - } - } + // loop through drives array and append each odata.id url to nvmeDriveURLs list + for _, drive := range chassisOutput.LinksUpper.Drives { + nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) } // Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool @@ -260,22 +267,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient))) - p := pool.NewPool(tasks, 1) + exp.pool = pool.NewPool(tasks, 1) - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - } + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -286,7 +280,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -301,10 +294,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -338,7 +331,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -350,7 +343,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+DL360, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -384,7 +378,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -399,16 +394,19 @@ func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { return fmt.Errorf("Error Unmarshalling DL360 DiskDriveMetrics - " + err.Error()) } // Check physical drive is enabled then check status and convert string to numeric values - - if dlphysical.Status.Health == "OK" { - state = OK + if dlphysical.Status.State == "Enabled" { + if dlphysical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } // Physical drives need to have a unique identifier like location so as to not overwrite data // physical drives can have the same ID, but belong to a different ArrayController, therefore need more than just the ID as a unique identifier. - (*dlphysicaldrive)["driveStatus"].WithLabelValues(dlphysical.Name, dlphysical.Id, dlphysical.Location).Set(state) + (*dlphysicaldrive)["driveStatus"].WithLabelValues(dlphysical.Name, dlphysical.Id, dlphysical.Location, dlphysical.SerialNumber).Set(state) return nil } @@ -422,10 +420,14 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { return fmt.Errorf("Error Unmarshalling DL360 LogicalDriveMetrics - " + err.Error()) } // Check physical drive is enabled then check status and convert string to numeric values - if dllogical.Status.Health == "OK" { - state = OK + if dllogical.Status.State == "Enabled" { + if dllogical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } (*dllogicaldrive)["raidStatus"].WithLabelValues(dllogical.Name, dllogical.LogicalDriveName, dllogical.VolumeUniqueIdentifier, dllogical.Raid).Set(state) @@ -443,10 +445,14 @@ func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { } // Check nvme drive is enabled then check status and convert string to numeric values - if dlnvme.Status.Health == "OK" { - state = OK + if dlnvme.Oem.Hpe.DriveStatus.State == "Enabled" { + if dlnvme.Oem.Hpe.DriveStatus.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.PhysicalLocation.PartLocation.ServiceLabel).Set(state) @@ -595,6 +601,8 @@ func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDr } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } + } else if resp.StatusCode == http.StatusUnauthorized { + return drive, common.ErrInvalidCredential } else { return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } diff --git a/hpe/dl360/exporter_test.go b/hpe/dl360/exporter_test.go new file mode 100644 index 0000000..4e69e46 --- /dev/null +++ b/hpe/dl360/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl360 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_DL360_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl360/metrics.go b/hpe/dl360/metrics.go index 075bd22..86d3813 100644 --- a/hpe/dl360/metrics.go +++ b/hpe/dl360/metrics.go @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("dl360_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("dl360_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -52,17 +56,17 @@ func NewDeviceMetrics() *map[string]*metrics { // Splitting out the three different types of drives to gather metrics on each (NVMe, Disk Drive, and Logical Drive) // NVMe Drive Metrics NVMeDriveMetrics = &metrics{ - "nvmeDriveStatus": newServerMetric("dl360_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD", nil, []string{"protocol", "id", "serviceLabel"}), + "nvmeDriveStatus": newServerMetric("dl360_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"protocol", "id", "serviceLabel"}), } // Phyiscal Storage Disk Drive Metrics DiskDriveMetrics = &metrics{ - "driveStatus": newServerMetric("dl360_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD", nil, []string{"name", "Id", "location"}), // DiskDriveStatus values + "driveStatus": newServerMetric("dl360_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), // DiskDriveStatus values } // Logical Disk Drive Metrics LogicalDriveMetrics = &metrics{ - "raidStatus": newServerMetric("dl360_logical_drive_raid", "Current Logical Drive Raid", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), // Logical Drive Raid value + "raidStatus": newServerMetric("dl360_logical_drive_status", "Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), // Logical Drive Raid value } MemoryMetrics = &metrics{ @@ -70,6 +74,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "nvmeMetrics": NVMeDriveMetrics, diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index a0e34c6..0fdc270 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -24,6 +24,7 @@ type NVMeDriveMetrics struct { Model string `json:"Model"` Name string `json:"Name"` MediaType string `json:"MediaType"` + Oem Oem `json:"Oem"` PhysicalLocation PhysicalLocation `json:"PhysicalLocation"` Protocol string `json:"Protocol"` Status DriveStatus `json:"Status"` @@ -32,6 +33,7 @@ type NVMeDriveMetrics struct { } // Logical Drives +// // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/LogicalDrives/X/ type LogicalDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -47,6 +49,7 @@ type LogicalDriveMetrics struct { } // Disk Drives +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/DiskDrives/X/ type DiskDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -61,27 +64,42 @@ type DiskDriveMetrics struct { // NVME, Logical, and Physical Disk Drive Status type DriveStatus struct { - Health string `json:"Health,omitempty"` - State string `json:"Enabled,omitempty"` + Health string `json:"Health"` + State string `json:"State,omitempty"` } // GenericDrive is used to iterate over differing drive endpoints +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ for Logical and Physical Drives +// /redfish/v1/Chassis/1/Drives/ for NVMe Drive(s) type GenericDrive struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members,omitempty"` - Links struct { - Drives []struct { - URL string `json:"@odata.id"` - } `json:"Drives,omitempty"` - LogicalDrives struct { - URL string `json:"@odata.id"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives struct { - URL string `json:"@odata.id"` - } `json:"PhysicalDrives,omitempty"` - } `json:"Links,omitempty"` - MembersCount int `json:"Members@odata.count,omitempty"` + Members []Members `json:"Members,omitempty"` + LinksUpper LinksUpper `json:"Links,omitempty"` + LinksLower LinksLower `json:"links,omitempty"` + MembersCount int `json:"Members@odata.count,omitempty"` +} + +type Members struct { + URL string `json:"@odata.id"` +} + +type LinksUpper struct { + Drives []URL `json:"Drives,omitempty"` + LogicalDrives URL `json:"LogicalDrives,omitempty"` + PhysicalDrives URL `json:"PhysicalDrives,omitempty"` +} + +type LinksLower struct { + Drives []HRef `json:"Drives,omitempty"` + LogicalDrives HRef `json:"LogicalDrives,omitempty"` + PhysicalDrives HRef `json:"PhysicalDrives,omitempty"` +} + +type HRef struct { + URL string `json:"href"` +} + +type URL struct { + URL string `json:"@odata.id"` } // PhysicalLocation @@ -102,6 +120,6 @@ type Oem struct { // Contents of Hpe type HpeCont struct { CurrentTemperatureCelsius int `json:"CurrentTemperatureCelsius"` - DriveStatus DriveStatus `json:"Status"` + DriveStatus DriveStatus `json:"DriveStatus"` NVMeID string `json:"NVMeId"` } diff --git a/hpe/dl380/drive_test.go b/hpe/dl380/drive_test.go new file mode 100644 index 0000000..a92a989 --- /dev/null +++ b/hpe/dl380/drive_test.go @@ -0,0 +1,562 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + GoodLogicalDriveUpperResponse = ` + # HELP dl380_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl380_logical_drive_status gauge + dl380_logical_drive_status{logicaldrivename="TESTDRIVE NAME 1",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="ABCDEF12345"} 1 + ` + GoodDiskDriveUpperResponse = ` + # HELP dl380_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl380_disk_drive_status gauge + dl380_disk_drive_status{id="0",location="1I:1:1",name="HpeSmartStorageDiskDrive",serialnumber="ABC123"} 1 + ` + GoodLogicalDriveLowerResponse = ` + # HELP dl380_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl380_logical_drive_status gauge + dl380_logical_drive_status{logicaldrivename="TESTDRIVE NAME 2",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="FEDCBA12345"} 1 + ` + GoodDiskDriveLowerResponse = ` + # HELP dl380_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl380_disk_drive_status gauge + dl380_disk_drive_status{id="1",location="1I:1:2",name="HpeSmartStorageDiskDrive",serialnumber="DEF456"} 1 + ` + GoodNvmeDriveResponse = ` + # HELP dl380_nvme_drive_status Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl380_nvme_drive_status gauge + dl380_nvme_drive_status{id="0",protocol="NVMe",serviceLabel="Box 3:Bay 7"} 1 + ` +) + +var ( + GoodDiskDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "0", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:1", + SerialNumber: "ABC123", + }) + + GoodDiskDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "1", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:2", + SerialNumber: "DEF456", + }) + + GoodLogicalDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 1", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "ABCDEF12345", + }) + + GoodLogicalDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 2", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "FEDCBA12345", + }) + + GoodNvmeDrive = MustMarshal(struct { + Id string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + } `json:"Oem"` + PhysicalLocation struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + } `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` + }{ + Id: "0", + Model: "TESTMODEL", + Name: "TESTNAME", + MediaType: "SSD", + Oem: struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + }{ + Hpe: struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + }{ + DriveStatus: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + }, + }, + PhysicalLocation: struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + }{ + PartLocation: struct { + ServiceLabel string `json:"ServiceLabel"` + }{ + ServiceLabel: "Box 3:Bay 7", + }, + }, + Protocol: "NVMe", + FailurePredicted: false, + CapacityBytes: 1600321314816, + }) +) + +func Test_DL380_Drives(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 2, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/", + }, + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + } `json:"Links"` + }{ + LinksUpper: struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksLower struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + } `json:"links"` + }{ + LinksLower: struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Chassis/1/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + } `json:"Links"` + }{ + LinksUpper: struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + }{ + Drives: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/Storage/DA000000/Drives/DA000000/", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + logicalDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportLogicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + physDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportPhysicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + nvmeDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportNVMeDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + exportFunc func(*Exporter, []byte) error + payload []byte + expected string + }{ + { + name: "Good Logical Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl380_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveUpper, + expected: GoodLogicalDriveUpperResponse, + }, + { + name: "Good Logical Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl380_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveLower, + expected: GoodLogicalDriveLowerResponse, + }, + { + name: "Good Disk Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl380_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveUpper, + expected: GoodDiskDriveUpperResponse, + }, + { + name: "Good Disk Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl380_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveLower, + expected: GoodDiskDriveLowerResponse, + }, + { + name: "Good Nvme Drive", + uri: "/redfish/v1/good", + metricName: "dl380_nvme_drive_status", + metricRef1: "nvmeMetrics", + metricRef2: "nvmeDriveStatus", + exportFunc: nvmeDevMetrics, + payload: GoodNvmeDrive, + expected: GoodNvmeDriveResponse, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + err = test.exportFunc(exporter.(*Exporter), test.payload) + if err != nil { + t.Error(err) + } + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 6554070..6af0013 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -69,20 +70,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE DL380 device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -124,6 +128,14 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } // vars for drive parsing var ( @@ -137,53 +149,86 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { // PARSING DRIVE ENDPOINTS // Get initial JSON return of /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ set to output - output, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) - - // Loop through Members to get ArrayController URLs + driveResp, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) if err != nil { log.Error("api call "+fqdn.String()+uri+url+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: DL380, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } + return nil, err } - if output.MembersCount > 0 { - for _, member := range output.Members { - // for each ArrayController URL, get the JSON object - newOutput, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + for _, member := range driveResp.Members { + // for each ArrayController URL, get the JSON object + // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/ + arrayCtrlResp, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+member.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL, target, retryClient) if err != nil { - log.Error("api call "+fqdn.String()+member.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - // If LogicalDrives is present, parse logical drive endpoint until all urls are found - if newOutput.Links.LogicalDrives.URL != "" { - logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Links.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Links.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } - if logicalDriveOutput.MembersCount > 0 { - // loop through each Member in the "LogicalDrive" field - for _, member := range logicalDriveOutput.Members { - // append each URL in the Members array to the logicalDriveURLs array. - logicalDriveURLs = append(logicalDriveURLs, member.URL) - } - } + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL, target, retryClient) + + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - // If PhysicalDrives is present, parse physical drive endpoint until all urls are found - if newOutput.Links.PhysicalDrives.URL != "" { - physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+newOutput.Links.PhysicalDrives.URL, target, retryClient) + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) + } + } - if err != nil { - log.Error("api call "+fqdn.String()+newOutput.Links.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - continue - } - if physicalDriveOutput.MembersCount > 0 { - for _, member := range physicalDriveOutput.Members { - physicalDriveURLs = append(physicalDriveURLs, member.URL) - } - } + if arrayCtrlResp.LinksLower.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } + + if arrayCtrlResp.LinksLower.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) } } } @@ -192,15 +237,13 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { chassisOutput, err := getDriveEndpoint(fqdn.String()+uri+chassisUrl, target, retryClient) if err != nil { log.Error("api call "+fqdn.String()+uri+chassisUrl+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + return nil, err } // parse through "Links" to find "Drives" array - if len(chassisOutput.Links.Drives) > 0 { - // loop through drives array and append each odata.id url to nvmeDriveURLs list - for _, drive := range chassisOutput.Links.Drives { - nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) - } + // loop through drives array and append each odata.id url to nvmeDriveURLs list + for _, drive := range chassisOutput.LinksUpper.Drives { + nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) } // Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool @@ -222,23 +265,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient))) - // Prepare the pool of tasks - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() + exp.pool = pool.NewPool(tasks, 1) - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - } + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -249,7 +278,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -264,10 +292,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -301,7 +329,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -313,7 +341,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+DL380, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -347,7 +376,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -362,16 +392,19 @@ func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { return fmt.Errorf("Error Unmarshalling DL380 DiskDriveMetrics - " + err.Error()) } // Check physical drive is enabled then check status and convert string to numeric values - - if dlphysical.Status.Health == "OK" { - state = OK + if dlphysical.Status.State == "Enabled" { + if dlphysical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } // Physical drives need to have a unique identifier like location so as to not overwrite data // physical drives can have the same ID, but belong to a different ArrayController, therefore need more than just the ID as a unique identifier. - (*dlphysicaldrive)["driveStatus"].WithLabelValues(dlphysical.Name, dlphysical.Id, dlphysical.Location).Set(state) + (*dlphysicaldrive)["driveStatus"].WithLabelValues(dlphysical.Name, dlphysical.Id, dlphysical.Location, dlphysical.SerialNumber).Set(state) return nil } @@ -385,10 +418,14 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { return fmt.Errorf("Error Unmarshalling DL380 LogicalDriveMetrics - " + err.Error()) } // Check physical drive is enabled then check status and convert string to numeric values - if dllogical.Status.Health == "OK" { - state = OK + if dllogical.Status.State == "Enabled" { + if dllogical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } (*dllogicaldrive)["raidStatus"].WithLabelValues(dllogical.Name, dllogical.LogicalDriveName, dllogical.VolumeUniqueIdentifier, dllogical.Raid).Set(state) @@ -406,10 +443,14 @@ func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { } // Check nvme drive is enabled then check status and convert string to numeric values - if dlnvme.Status.Health == "OK" { - state = OK + if dlnvme.Oem.Hpe.DriveStatus.State == "Enabled" { + if dlnvme.Oem.Hpe.DriveStatus.Health == "OK" { + state = OK + } else { + state = BAD + } } else { - state = BAD + state = DISABLED } (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.PhysicalLocation.PartLocation.ServiceLabel).Set(state) @@ -537,6 +578,8 @@ func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDr } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } + } else if resp.StatusCode == http.StatusUnauthorized { + return drive, common.ErrInvalidCredential } else { return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } diff --git a/hpe/dl380/exporter_test.go b/hpe/dl380/exporter_test.go new file mode 100644 index 0000000..84a331b --- /dev/null +++ b/hpe/dl380/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_DL380_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl380/metrics.go b/hpe/dl380/metrics.go index dd9dec5..603f644 100644 --- a/hpe/dl380/metrics.go +++ b/hpe/dl380/metrics.go @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("dl380_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("dl380_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -52,17 +56,17 @@ func NewDeviceMetrics() *map[string]*metrics { // Splitting out the three different types of drives to gather metrics on each (NVMe, Disk Drive, and Logical Drive) // NVMe Drive Metrics NVMeDriveMetrics = &metrics{ - "nvmeDriveStatus": newServerMetric("dl380_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD", nil, []string{"protocol", "id", "serviceLabel"}), + "nvmeDriveStatus": newServerMetric("dl380_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"protocol", "id", "serviceLabel"}), } // Phyiscal Storage Disk Drive Metrics DiskDriveMetrics = &metrics{ - "driveStatus": newServerMetric("dl380_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD", nil, []string{"name", "Id", "location"}), // DiskDriveStatus values + "driveStatus": newServerMetric("dl380_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), // DiskDriveStatus values } // Logical Disk Drive Metrics LogicalDriveMetrics = &metrics{ - "raidStatus": newServerMetric("dl380_logical_drive_raid", "Current Logical Drive Raid", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), // Logical Drive Raid value + "raidStatus": newServerMetric("dl380_logical_drive_status", "Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), // Logical Drive Raid value } MemoryMetrics = &metrics{ @@ -70,6 +74,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "nvmeMetrics": NVMeDriveMetrics, diff --git a/hpe/dl560/drive.go b/hpe/dl560/drive.go index 3c055cf..6302479 100644 --- a/hpe/dl560/drive.go +++ b/hpe/dl560/drive.go @@ -18,42 +18,104 @@ package dl560 // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ -type GenericDrive struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members"` - MembersCount int `json:"Members@odata.count,omitempty"` - Links struct { - LogicalDrives struct { - URL string `json:"href"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives struct { - URL string `json:"href"` - } `json:"PhysicalDrives,omitempty"` - } +// NVME's +// /redfish/v1/chassis/1/ +// NVMeMetrics is the top level json object for DL380 NVMe Metrics Metadata +type NVMeDriveMetrics struct { + ID string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem Oem `json:"Oem"` + PhysicalLocation PhysicalLocation `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + Status Status `json:"Status"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` } +// Logical Drives // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/LogicalDrives/X/ type LogicalDriveMetrics struct { - ID string `json:"Id"` - CapacityMiB int `json:"CapacityMiB"` - Description string `json:"Description"` - InterfaceType string `json:"InterfaceType"` - LogicalDriveName string `json:"LogicalDriveName"` - LogicalDriveNumber int `json:"LogicalDriveNumber"` - Name string `json:"Name"` - Raid string `json:"Raid"` - Status Status `json:"Status"` - StripeSizeBytes int `json:"StripeSizeBytes"` + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status Status `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` } +// Disk Drives // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/DiskDrives/X/ -type PhysicalDriveMetrics struct { - ID string `json:"Id"` - CapacityGB int `json:"CapacityGB"` - Location string `json:"Location"` - Model string `json:"Model"` - Name string `json:"Name"` - SerialNumber string `json:"SerialNumber"` - Status Status `json:"Status"` +type DiskDriveMetrics struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status Status `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` +} + +// GenericDrive is used to iterate over differing drive endpoints +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ for Logical and Physical Drives +// /redfish/v1/Chassis/1/Drives/ for NVMe Drive(s) +type GenericDrive struct { + Members []Members `json:"Members,omitempty"` + LinksUpper LinksUpper `json:"Links,omitempty"` + LinksLower LinksLower `json:"links,omitempty"` + MembersCount int `json:"Members@odata.count,omitempty"` +} + +type Members struct { + URL string `json:"@odata.id"` +} + +type LinksUpper struct { + Drives []URL `json:"Drives,omitempty"` + LogicalDrives URL `json:"LogicalDrives,omitempty"` + PhysicalDrives URL `json:"PhysicalDrives,omitempty"` +} + +type LinksLower struct { + Drives []HRef `json:"Drives,omitempty"` + LogicalDrives HRef `json:"LogicalDrives,omitempty"` + PhysicalDrives HRef `json:"PhysicalDrives,omitempty"` +} + +type HRef struct { + URL string `json:"href"` +} + +type URL struct { + URL string `json:"@odata.id"` +} + +// PhysicalLocation +type PhysicalLocation struct { + PartLocation PartLocation `json:"PartLocation"` +} + +// PartLocation is a variable that determines the Box and the Bay location of the NVMe drive +type PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` +} + +// Contents of Oem +type Oem struct { + Hpe HpeCont `json:"Hpe"` +} + +// Contents of Hpe +type HpeCont struct { + CurrentTemperatureCelsius int `json:"CurrentTemperatureCelsius"` + DriveStatus Status `json:"DriveStatus"` + NVMeID string `json:"NVMeId"` } diff --git a/hpe/dl560/drive_test.go b/hpe/dl560/drive_test.go new file mode 100644 index 0000000..75d8e08 --- /dev/null +++ b/hpe/dl560/drive_test.go @@ -0,0 +1,562 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl560 + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + GoodLogicalDriveUpperResponse = ` + # HELP dl560_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl560_logical_drive_status gauge + dl560_logical_drive_status{logicaldrivename="TESTDRIVE NAME 1",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="ABCDEF12345"} 1 + ` + GoodDiskDriveUpperResponse = ` + # HELP dl560_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl560_disk_drive_status gauge + dl560_disk_drive_status{id="0",location="1I:1:1",name="HpeSmartStorageDiskDrive",serialnumber="ABC123"} 1 + ` + GoodLogicalDriveLowerResponse = ` + # HELP dl560_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl560_logical_drive_status gauge + dl560_logical_drive_status{logicaldrivename="TESTDRIVE NAME 2",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="FEDCBA12345"} 1 + ` + GoodDiskDriveLowerResponse = ` + # HELP dl560_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl560_disk_drive_status gauge + dl560_disk_drive_status{id="1",location="1I:1:2",name="HpeSmartStorageDiskDrive",serialnumber="DEF456"} 1 + ` + GoodNvmeDriveResponse = ` + # HELP dl560_nvme_drive_status Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE dl560_nvme_drive_status gauge + dl560_nvme_drive_status{id="0",protocol="NVMe",serviceLabel="Box 3:Bay 7"} 1 + ` +) + +var ( + GoodDiskDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "0", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:1", + SerialNumber: "ABC123", + }) + + GoodDiskDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "1", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:2", + SerialNumber: "DEF456", + }) + + GoodLogicalDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 1", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "ABCDEF12345", + }) + + GoodLogicalDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 2", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "FEDCBA12345", + }) + + GoodNvmeDrive = MustMarshal(struct { + Id string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + } `json:"Oem"` + PhysicalLocation struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + } `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` + }{ + Id: "0", + Model: "TESTMODEL", + Name: "TESTNAME", + MediaType: "SSD", + Oem: struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + }{ + Hpe: struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + }{ + DriveStatus: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + }, + }, + PhysicalLocation: struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + }{ + PartLocation: struct { + ServiceLabel string `json:"ServiceLabel"` + }{ + ServiceLabel: "Box 3:Bay 7", + }, + }, + Protocol: "NVMe", + FailurePredicted: false, + CapacityBytes: 1600321314816, + }) +) + +func Test_DL560_Drives(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 2, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/", + }, + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + } `json:"Links"` + }{ + LinksUpper: struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksLower struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + } `json:"links"` + }{ + LinksLower: struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Chassis/1/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + } `json:"Links"` + }{ + LinksUpper: struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + }{ + Drives: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/Storage/DA000000/Drives/DA000000/", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + logicalDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportLogicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + physDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportPhysicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + nvmeDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportNVMeDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + exportFunc func(*Exporter, []byte) error + payload []byte + expected string + }{ + { + name: "Good Logical Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl560_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveUpper, + expected: GoodLogicalDriveUpperResponse, + }, + { + name: "Good Logical Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl560_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveLower, + expected: GoodLogicalDriveLowerResponse, + }, + { + name: "Good Disk Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "dl560_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveUpper, + expected: GoodDiskDriveUpperResponse, + }, + { + name: "Good Disk Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "dl560_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveLower, + expected: GoodDiskDriveLowerResponse, + }, + { + name: "Good Nvme Drive", + uri: "/redfish/v1/good", + metricName: "dl560_nvme_drive_status", + metricRef1: "nvmeMetrics", + metricRef2: "nvmeDriveStatus", + exportFunc: nvmeDevMetrics, + payload: GoodNvmeDrive, + expected: GoodNvmeDriveResponse, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + err = test.exportFunc(exporter.(*Exporter), test.payload) + if err != nil { + t.Error(err) + } + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl560/exporter.go b/hpe/dl560/exporter.go index f34aec1..42fb761 100644 --- a/hpe/dl560/exporter.go +++ b/hpe/dl560/exporter.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -46,8 +47,10 @@ const ( THERMAL = "ThermalMetrics" // POWER represents the power metric endpoint POWER = "PowerMetrics" - // DRIVE represents the physical drive metric endpoints - DRIVE = "PhysicalDriveMetrics" + // NVME represents the NVMe drive metric endpoint + NVME = "NVMeDriveMetrics" + // DISKDRIVE represents the Disk Drive metric endpoints + DISKDRIVE = "DiskDriveMetrics" // LOGICALDRIVE represents the Logical drive metric endpoint LOGICALDRIVE = "LogicalDriveMetrics" // MEMORY represents the memory metric endpoints @@ -67,20 +70,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE DL560 device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -122,76 +128,139 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal/", THERMAL, target, profile, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient)), + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } + + // vars for drive parsing + var ( + initialURL = "/Systems/1/SmartStorage/ArrayControllers/" + url = initialURL + chassisUrl = "/Chassis/1/" + logicalDriveURLs []string + physicalDriveURLs []string + nvmeDriveURLs []string ) // start drive metrics here - arrayControllers, err := getDriveEndpoints(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/", target, retryClient) + driveResp, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) if err != nil { - log.Error("error when getting array controllers endpoint from "+DL560, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + log.Error("api call "+fqdn.String()+uri+url+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: DL560, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } + return nil, err } - if arrayControllers.MembersCount > 0 { - for _, controller := range arrayControllers.Members { - getController, err := getDriveEndpoints(fqdn.String()+controller.URL, target, retryClient) + for _, member := range driveResp.Members { + arrayCtrlResp, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + if err != nil { + log.Error("error when getting array controller from "+DL560, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + if arrayCtrlResp.LinksUpper.LogicalDrives.URL != "" { + logicalDrives, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL, target, retryClient) if err != nil { - log.Error("error when getting array controller from "+DL560, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - if getController.Links.LogicalDrives.URL != "" { - logicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Links.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting logical drives endpoint from "+DL560, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } + for _, member := range logicalDrives.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } - if logicalDrives.MembersCount > 0 { - for _, logicalDrive := range logicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+logicalDrive.URL, LOGICALDRIVE, target, profile, retryClient))) - } - } + if arrayCtrlResp.LinksUpper.PhysicalDrives.URL != "" { + physicalDrives, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - if getController.Links.PhysicalDrives.URL != "" { - physicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Links.PhysicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting physical drives endpoint from "+DL560, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } + for _, member := range physicalDrives.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) + } + } - if physicalDrives.MembersCount > 0 { - for _, physicalDrive := range physicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+physicalDrive.URL, DRIVE, target, profile, retryClient))) - } - } + if arrayCtrlResp.LinksLower.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } + + if arrayCtrlResp.LinksLower.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) } } } - p := pool.NewPool(tasks, 1) + // parse to find NVME drives + chassisOutput, err := getDriveEndpoint(fqdn.String()+uri+chassisUrl, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+uri+chassisUrl+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // parse through "Links" to find "Drives" array + // loop through drives array and append each odata.id url to nvmeDriveURLs list + for _, drive := range chassisOutput.LinksUpper.Drives { + nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) + } + + // Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool + for _, url := range logicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, LOGICALDRIVE, target, profile, retryClient))) + } - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() + for _, url := range physicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, DISKDRIVE, target, profile, retryClient))) + } - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, + for _, url := range nvmeDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, NVME, target, profile, retryClient))) } + + tasks = append(tasks, + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal/", THERMAL, target, profile, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient)), + ) + + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -202,7 +271,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -217,10 +285,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -254,7 +322,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -266,7 +334,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+DL560, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -276,7 +345,9 @@ func (e *Exporter) scrape() { err = e.exportThermalMetrics(task.Body) case POWER: err = e.exportPowerMetrics(task.Body) - case DRIVE: + case NVME: + err = e.exportNVMeDriveMetrics(task.Body) + case DISKDRIVE: err = e.exportPhysicalDriveMetrics(task.Body) case LOGICALDRIVE: err = e.exportLogicalDriveMetrics(task.Body) @@ -298,7 +369,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -396,7 +468,7 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { state = DISABLED } - (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) + (*dlDrive)["raidStatus"].WithLabelValues(dld.Name, dld.LogicalDriveName, dld.VolumeUniqueIdentifier, dld.Raid).Set(state) return nil } @@ -405,8 +477,8 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { var state float64 - var dpd PhysicalDriveMetrics - var dpDrive = (*e.deviceMetrics)["driveMetrics"] + var dpd DiskDriveMetrics + var dpDrive = (*e.deviceMetrics)["diskDriveMetrics"] err := json.Unmarshal(body, &dpd) if err != nil { return fmt.Errorf("Error Unmarshalling DL560 PhysicalDriveMetrics - " + err.Error()) @@ -422,8 +494,33 @@ func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { state = DISABLED } - (*dpDrive)["physicalDriveStatus"].WithLabelValues(dpd.Name, dpd.ID, dpd.Location, dpd.SerialNumber).Set(state) + (*dpDrive)["driveStatus"].WithLabelValues(dpd.Name, dpd.Id, dpd.Location, dpd.SerialNumber).Set(state) + + return nil +} + +// exportNVMeDriveMetrics collects the DL380 NVME drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { + var state float64 + var dlnvme NVMeDriveMetrics + var dlnvmedrive = (*e.deviceMetrics)["nvmeMetrics"] + err := json.Unmarshal(body, &dlnvme) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL560 NVMeDriveMetrics - " + err.Error()) + } + + // Check nvme drive is enabled then check status and convert string to numeric values + if dlnvme.Oem.Hpe.DriveStatus.State == "Enabled" { + if dlnvme.Oem.Hpe.DriveStatus.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.PhysicalLocation.PartLocation.ServiceLabel).Set(state) return nil } @@ -449,8 +546,11 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } -func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericDrive, error) { - var drives GenericDrive +// The getDriveEndpoint function is used in a recursive fashion to get the body response +// of any type of drive, NVMe, Physical DiskDrives, or Logical Drives, using the GenericDrive struct +// This is used to find the final drive endpoints to append to the task pool for final scraping. +func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDrive, error) { + var drive GenericDrive var resp *http.Response var err error retryCount := 0 @@ -458,7 +558,7 @@ func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericD resp, err = common.DoRequest(client, req) if err != nil { - return drives, err + return drive, err } defer resp.Body.Close() if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { @@ -469,24 +569,26 @@ func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericD retryCount = retryCount + 1 } if err != nil { - return drives, err + return drive, err } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return drives, fmt.Errorf("HTTP status %d", resp.StatusCode) + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } + } else if resp.StatusCode == http.StatusUnauthorized { + return drive, common.ErrInvalidCredential } else { - return drives, fmt.Errorf("HTTP status %d", resp.StatusCode) + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } } body, err := io.ReadAll(resp.Body) if err != nil { - return drives, fmt.Errorf("Error reading Response Body - " + err.Error()) + return drive, fmt.Errorf("Error reading Response Body - " + err.Error()) } - err = json.Unmarshal(body, &drives) + err = json.Unmarshal(body, &drive) if err != nil { - return drives, fmt.Errorf("Error Unmarshalling DL560 Drive Collection struct - " + err.Error()) + return drive, fmt.Errorf("Error Unmarshalling DL560 Drive Collection struct - " + err.Error()) } - return drives, nil + return drive, nil } diff --git a/hpe/dl560/exporter_test.go b/hpe/dl560/exporter_test.go new file mode 100644 index 0000000..8dd2298 --- /dev/null +++ b/hpe/dl560/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl560 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_DL560_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/dl560/metrics.go b/hpe/dl560/metrics.go index 42e9b31..2931f92 100644 --- a/hpe/dl560/metrics.go +++ b/hpe/dl560/metrics.go @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("dl560_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("dl560_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -49,12 +53,20 @@ func NewDeviceMetrics() *map[string]*metrics { "supplyTotalCapacity": newServerMetric("dl560_power_supply_total_capacity", "Total output capacity of all the power supplies", nil, []string{"memberId"}), } - LogicalDriveMetrics = &metrics{ - "logicalDriveStatus": newServerMetric("dl560_logical_drive_status", "Current logical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), + // Splitting out the three different types of drives to gather metrics on each (NVMe, Disk Drive, and Logical Drive) + // NVMe Drive Metrics + NVMeDriveMetrics = &metrics{ + "nvmeDriveStatus": newServerMetric("dl560_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"protocol", "id", "serviceLabel"}), } - PhysicalDriveMetrics = &metrics{ - "physicalDriveStatus": newServerMetric("dl560_physical_drive_status", "Current physical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), + // Phyiscal Storage Disk Drive Metrics + DiskDriveMetrics = &metrics{ + "driveStatus": newServerMetric("dl560_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), + } + + // Logical Disk Drive Metrics + LogicalDriveMetrics = &metrics{ + "raidStatus": newServerMetric("dl560_logical_drive_status", "Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), } MemoryMetrics = &metrics{ @@ -62,10 +74,12 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, + "nvmeMetrics": NVMeDriveMetrics, + "diskDriveMetrics": DiskDriveMetrics, "logicalDriveMetrics": LogicalDriveMetrics, - "driveMetrics": PhysicalDriveMetrics, "memoryMetrics": MemoryMetrics, } ) diff --git a/hpe/moonshot/exporter.go b/hpe/moonshot/exporter.go index 65db44a..b93dd89 100644 --- a/hpe/moonshot/exporter.go +++ b/hpe/moonshot/exporter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -65,20 +66,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.MoonshotPool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.MoonshotPool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE Moonshot device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.MoonshotTask + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -120,6 +124,14 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() + + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } tasks = append(tasks, pool.NewMoonshotTask(fetch(fqdn.String()+uri+"/ThermalMetrics", MOONSHOT, THERMAL, target, profile, retryClient)), @@ -131,22 +143,9 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { pool.NewMoonshotTask(fetch(fqdn.String()+uri+"/switches/sa/PowerMetrics", SWITCHA, POWER, target, profile, retryClient)), pool.NewMoonshotTask(fetch(fqdn.String()+uri+"/switches/sb/PowerMetrics", SWITCHB, POWER, target, profile, retryClient))) - p := pool.NewMoonshotPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - } + exp.pool = pool.NewMoonshotPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -157,7 +156,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -172,10 +170,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -267,7 +265,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/rest/v1/chassis/1", @@ -279,7 +277,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+task.Device, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -334,7 +333,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } diff --git a/hpe/moonshot/metrics.go b/hpe/moonshot/metrics.go index a7c376e..9e1dc44 100644 --- a/hpe/moonshot/metrics.go +++ b/hpe/moonshot/metrics.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -63,6 +67,7 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, "swMetrics": SwitchMetrics, diff --git a/hpe/xl420/drive.go b/hpe/xl420/drive.go index abb358a..8b2f882 100644 --- a/hpe/xl420/drive.go +++ b/hpe/xl420/drive.go @@ -18,50 +18,103 @@ package xl420 // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ -type GenericDrive struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members"` - MembersCount int `json:"Members@odata.count,omitempty"` - Links *struct { - LogicalDrives *struct { - URL string `json:"href"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives *struct { - URL string `json:"href"` - } `json:"PhysicalDrives,omitempty"` - } `json:"Links,omitempty"` - Link *struct { - LogicalDrives *struct { - URL string `json:"href"` - } `json:"LogicalDrives,omitempty"` - PhysicalDrives *struct { - URL string `json:"href"` - } `json:"PhysicalDrives,omitempty"` - } `json:"links,omitempty"` +// NVME's +// /redfish/v1/chassis/1/ +type NVMeDriveMetrics struct { + ID string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem Oem `json:"Oem"` + PhysicalLocation PhysicalLocation `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + Status Status `json:"Status"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` } +// Logical Drives // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/LogicalDrives/X/ type LogicalDriveMetrics struct { - ID string `json:"Id"` - CapacityMiB int `json:"CapacityMiB"` - Description string `json:"Description"` - InterfaceType string `json:"InterfaceType"` - LogicalDriveName string `json:"LogicalDriveName"` - LogicalDriveNumber int `json:"LogicalDriveNumber"` - Name string `json:"Name"` - Raid string `json:"Raid"` - Status Status `json:"Status"` - StripeSizeBytes int `json:"StripeSizeBytes"` + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status Status `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` } +// Disk Drives // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/DiskDrives/X/ -type PhysicalDriveMetrics struct { - ID string `json:"Id"` - CapacityGB int `json:"CapacityGB"` - Location string `json:"Location"` - Model string `json:"Model"` - Name string `json:"Name"` - SerialNumber string `json:"SerialNumber"` - Status Status `json:"Status"` +type DiskDriveMetrics struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status Status `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` +} + +// GenericDrive is used to iterate over differing drive endpoints +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ for Logical and Physical Drives +// /redfish/v1/Chassis/1/Drives/ for NVMe Drive(s) +type GenericDrive struct { + Members []Members `json:"Members,omitempty"` + LinksUpper LinksUpper `json:"Links,omitempty"` + LinksLower LinksLower `json:"links,omitempty"` + MembersCount int `json:"Members@odata.count,omitempty"` +} + +type Members struct { + URL string `json:"@odata.id"` +} + +type LinksUpper struct { + Drives []URL `json:"Drives,omitempty"` + LogicalDrives URL `json:"LogicalDrives,omitempty"` + PhysicalDrives URL `json:"PhysicalDrives,omitempty"` +} + +type LinksLower struct { + Drives []HRef `json:"Drives,omitempty"` + LogicalDrives HRef `json:"LogicalDrives,omitempty"` + PhysicalDrives HRef `json:"PhysicalDrives,omitempty"` +} + +type HRef struct { + URL string `json:"href"` +} + +type URL struct { + URL string `json:"@odata.id"` +} + +// PhysicalLocation +type PhysicalLocation struct { + PartLocation PartLocation `json:"PartLocation"` +} + +// PartLocation is a variable that determines the Box and the Bay location of the NVMe drive +type PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` +} + +// Contents of Oem +type Oem struct { + Hpe HpeCont `json:"Hpe"` +} + +// Contents of Hpe +type HpeCont struct { + CurrentTemperatureCelsius int `json:"CurrentTemperatureCelsius"` + DriveStatus Status `json:"DriveStatus"` + NVMeID string `json:"NVMeId"` } diff --git a/hpe/xl420/drive_test.go b/hpe/xl420/drive_test.go new file mode 100644 index 0000000..e328e5f --- /dev/null +++ b/hpe/xl420/drive_test.go @@ -0,0 +1,562 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package xl420 + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + GoodLogicalDriveUpperResponse = ` + # HELP xl420_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE xl420_logical_drive_status gauge + xl420_logical_drive_status{logicaldrivename="TESTDRIVE NAME 1",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="ABCDEF12345"} 1 + ` + GoodDiskDriveUpperResponse = ` + # HELP xl420_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE xl420_disk_drive_status gauge + xl420_disk_drive_status{id="0",location="1I:1:1",name="HpeSmartStorageDiskDrive",serialnumber="ABC123"} 1 + ` + GoodLogicalDriveLowerResponse = ` + # HELP xl420_logical_drive_status Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE xl420_logical_drive_status gauge + xl420_logical_drive_status{logicaldrivename="TESTDRIVE NAME 2",name="HpeSmartStorageLogicalDrive",raid="1",volumeuniqueidentifier="FEDCBA12345"} 1 + ` + GoodDiskDriveLowerResponse = ` + # HELP xl420_disk_drive_status Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE xl420_disk_drive_status gauge + xl420_disk_drive_status{id="1",location="1I:1:2",name="HpeSmartStorageDiskDrive",serialnumber="DEF456"} 1 + ` + GoodNvmeDriveResponse = ` + # HELP xl420_nvme_drive_status Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED + # TYPE xl420_nvme_drive_status gauge + xl420_nvme_drive_status{id="0",protocol="NVMe",serviceLabel="Box 3:Bay 7"} 1 + ` +) + +var ( + GoodDiskDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "0", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:1", + SerialNumber: "ABC123", + }) + + GoodDiskDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + Location string `json:"Location"` + SerialNumber string `json:"SerialNumber"` + }{ + Id: "1", + CapacityMiB: 572325, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + Name: "HpeSmartStorageDiskDrive", + Model: "TESTMODEL", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + Location: "1I:1:2", + SerialNumber: "DEF456", + }) + + GoodLogicalDriveUpper = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 1", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "ABCDEF12345", + }) + + GoodLogicalDriveLower = MustMarshal(struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` + VolumeUniqueIdentifier string `json:"VolumeUniqueIdentifier"` + }{ + Id: "1", + CapacityMiB: 572293, + Description: "HPE Smart Storage Disk Drive View", + InterfaceType: "SAS", + LogicalDriveName: "TESTDRIVE NAME 2", + LogicalDriveNumber: 1, + Name: "HpeSmartStorageLogicalDrive", + Raid: "1", + Status: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + StripeSizebytes: 262144, + VolumeUniqueIdentifier: "FEDCBA12345", + }) + + GoodNvmeDrive = MustMarshal(struct { + Id string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + Oem struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + } `json:"Oem"` + PhysicalLocation struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + } `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` + }{ + Id: "0", + Model: "TESTMODEL", + Name: "TESTNAME", + MediaType: "SSD", + Oem: struct { + Hpe struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + } `json:"Hpe"` + }{ + Hpe: struct { + DriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` + } `json:"DriveStatus"` + }{ + DriveStatus: struct { + Health string `json:"Health"` + State string `json:"State"` + }{ + Health: "OK", + State: "Enabled", + }, + }, + }, + PhysicalLocation: struct { + PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` + } `json:"PartLocation"` + }{ + PartLocation: struct { + ServiceLabel string `json:"ServiceLabel"` + }{ + ServiceLabel: "Box 3:Bay 7", + }, + }, + Protocol: "NVMe", + FailurePredicted: false, + CapacityBytes: 1600321314816, + }) +) + +func Test_XL420_Drives(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 2, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/", + }, + { + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + } `json:"Links"` + }{ + LinksUpper: struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"@odata.id"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksLower struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + } `json:"links"` + }{ + LinksLower: struct { + LogicalDrives struct { + URL string `json:"href"` + } `json:"LogicalDrives"` + PhysicalDrives struct { + URL string `json:"href"` + } `json:"PhysicalDrives"` + }{ + LogicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/", + }, + PhysicalDrives: struct { + URL string `json:"href"` + }{ + URL: "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/LogicalDrives/1/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + MembersCount int `json:"Members@odata.count"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + }{ + MembersCount: 1, + Members: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/", + }, + }, + })) + return + } else if r.URL.Path == "/redfish/v1/good/Chassis/1/" { + w.WriteHeader(http.StatusOK) + w.Write(MustMarshal(struct { + LinksUpper struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + } `json:"Links"` + }{ + LinksUpper: struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives"` + }{ + Drives: []struct { + URL string `json:"@odata.id"` + }{ + { + URL: "/redfish/v1/Systems/1/Storage/DA000000/Drives/DA000000/", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + logicalDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportLogicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + physDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportPhysicalDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + nvmeDevMetrics := func(exp *Exporter, payload []byte) error { + err := exp.exportNVMeDriveMetrics(payload) + if err != nil { + return err + } + return nil + } + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + exportFunc func(*Exporter, []byte) error + payload []byte + expected string + }{ + { + name: "Good Logical Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "xl420_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveUpper, + expected: GoodLogicalDriveUpperResponse, + }, + { + name: "Good Logical Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "xl420_logical_drive_status", + metricRef1: "logicalDriveMetrics", + metricRef2: "raidStatus", + exportFunc: logicalDevMetrics, + payload: GoodLogicalDriveLower, + expected: GoodLogicalDriveLowerResponse, + }, + { + name: "Good Disk Drive Links Uppercase", + uri: "/redfish/v1/good", + metricName: "xl420_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveUpper, + expected: GoodDiskDriveUpperResponse, + }, + { + name: "Good Disk Drive Links Lowercase", + uri: "/redfish/v1/good", + metricName: "xl420_disk_drive_status", + metricRef1: "diskDriveMetrics", + metricRef2: "driveStatus", + exportFunc: physDevMetrics, + payload: GoodDiskDriveLower, + expected: GoodDiskDriveLowerResponse, + }, + { + name: "Good Nvme Drive", + uri: "/redfish/v1/good", + metricName: "xl420_nvme_drive_status", + metricRef1: "nvmeMetrics", + metricRef2: "nvmeDriveStatus", + exportFunc: nvmeDevMetrics, + payload: GoodNvmeDrive, + expected: GoodNvmeDriveResponse, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + err = test.exportFunc(exporter.(*Exporter), test.payload) + if err != nil { + t.Error(err) + } + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/xl420/exporter.go b/hpe/xl420/exporter.go index f2f3b5c..f650a2b 100644 --- a/hpe/xl420/exporter.go +++ b/hpe/xl420/exporter.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "io" "net" @@ -46,8 +47,10 @@ const ( THERMAL = "ThermalMetrics" // POWER represents the power metric endpoint POWER = "PowerMetrics" - // DRIVE represents the physical drive metric endpoints - DRIVE = "PhysicalDriveMetrics" + // NVME represents the NVMe drive metric endpoint + NVME = "NVMeDriveMetrics" + // DISKDRIVE represents the Disk Drive metric endpoints + DISKDRIVE = "DiskDriveMetrics" // LOGICALDRIVE represents the Logical drive metric endpoint LOGICALDRIVE = "LogicalDriveMetrics" // MEMORY represents the memory metric endpoints @@ -67,20 +70,23 @@ var ( // Exporter collects chassis manager stats from the given URI and exports them using // the prometheus metrics package. type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - credProfile string - - up prometheus.Gauge + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + credProfile string deviceMetrics *map[string]*metrics } // NewExporter returns an initialized Exporter for HPE XL420 device. -func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { +func NewExporter(ctx context.Context, target, uri, profile string) (*Exporter, error) { var fqdn *url.URL var tasks []*pool.Task + var exp = Exporter{ + ctx: ctx, + credProfile: profile, + deviceMetrics: NewDeviceMetrics(), + } log = zap.L() @@ -122,107 +128,147 @@ func NewExporter(ctx context.Context, target, uri, profile string) *Exporter { Host: target, } } + exp.host = fqdn.String() - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal/", THERMAL, target, profile, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient))) + // check if host is on the ignored list, if so we immediately return + if _, ok := common.IgnoredDevices[exp.host]; ok { + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + return &exp, nil + } - arrayControllers, err := getDriveEndpoints(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/", target, retryClient) + // vars for drive parsing + var ( + initialURL = "/Systems/1/SmartStorage/ArrayControllers/" + url = initialURL + chassisUrl = "/Chassis/1/" + logicalDriveURLs []string + physicalDriveURLs []string + nvmeDriveURLs []string + ) + + // PARSING DRIVE ENDPOINTS + // Get initial JSON return of /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ set to output + driveResp, err := getDriveEndpoint(fqdn.String()+uri+url, target, retryClient) if err != nil { - log.Error("error when getting array controllers endpoint from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + log.Error("api call "+fqdn.String()+uri+url+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + if errors.Is(err, common.ErrInvalidCredential) { + common.IgnoredDevices[exp.host] = common.IgnoredDevice{ + Name: exp.host, + Endpoint: "https://" + exp.host + "/redfish/v1/Chassis", + Module: XL420, + CredentialProfile: exp.credProfile, + } + log.Info("added host "+exp.host+" to ignored list", zap.Any("trace_id", exp.ctx.Value("traceID"))) + var upMetric = (*exp.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) + + return &exp, nil + } + return nil, err } - if arrayControllers.MembersCount > 0 { - for _, controller := range arrayControllers.Members { - getController, err := getDriveEndpoints(fqdn.String()+controller.URL, target, retryClient) + // Loop through Members to get ArrayController URLs + for _, member := range driveResp.Members { + // for each ArrayController URL, get the JSON object + // /redfish/v1/Systems/1/SmartStorage/ArrayControllers/X/ + arrayCtrlResp, err := getDriveEndpoint(fqdn.String()+member.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+member.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL, target, retryClient) if err != nil { - log.Error("error when getting array controller from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err } - if getController.Links != nil { - if getController.Links.LogicalDrives != nil && getController.Links.LogicalDrives.URL != "" { - logicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Links.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting logical drives endpoint from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } - - if logicalDrives.MembersCount > 0 { - for _, logicalDrive := range logicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+logicalDrive.URL, LOGICALDRIVE, target, profile, retryClient))) - } - } - } + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } - if getController.Links.PhysicalDrives != nil && getController.Links.PhysicalDrives.URL != "" { - physicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Links.PhysicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting physical drives endpoint from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } - - if physicalDrives.MembersCount > 0 { - for _, physicalDrive := range physicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+physicalDrive.URL, DRIVE, target, profile, retryClient))) - } - } - } + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if arrayCtrlResp.LinksUpper.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksUpper.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } - } else if getController.Link != nil { - if getController.Link.LogicalDrives != nil && getController.Link.LogicalDrives.URL != "" { - logicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Link.LogicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting logical drives endpoint from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } - - if logicalDrives.MembersCount > 0 { - for _, logicalDrive := range logicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+logicalDrive.URL, LOGICALDRIVE, target, profile, retryClient))) - } - } - } + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) + } + } - if getController.Link.PhysicalDrives != nil && getController.Link.PhysicalDrives.URL != "" { - physicalDrives, err := getDriveEndpoints(fqdn.String()+getController.Link.PhysicalDrives.URL, target, retryClient) - if err != nil { - log.Error("error when getting physical drives endpoint from "+XL420, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil - } - - if physicalDrives.MembersCount > 0 { - for _, physicalDrive := range physicalDrives.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+physicalDrive.URL, DRIVE, target, profile, retryClient))) - } - } - } + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if arrayCtrlResp.LinksLower.LogicalDrives.URL != "" { + logicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.LogicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } + + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if arrayCtrlResp.LinksLower.PhysicalDrives.URL != "" { + physicalDriveOutput, err := getDriveEndpoint(fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+arrayCtrlResp.LinksLower.PhysicalDrives.URL+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) } } } - p := pool.NewPool(tasks, 1) + // parse to find NVME drives + chassisOutput, err := getDriveEndpoint(fqdn.String()+uri+chassisUrl, target, retryClient) + if err != nil { + log.Error("api call "+fqdn.String()+uri+chassisUrl+" failed - ", zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + // parse through "Links" to find "Drives" array + // loop through drives array and append each odata.id url to nvmeDriveURLs list + for _, drive := range chassisOutput.LinksUpper.Drives { + nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) + } - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() + // Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool + for _, url := range logicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, LOGICALDRIVE, target, profile, retryClient))) + } + + for _, url := range physicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, DISKDRIVE, target, profile, retryClient))) + } - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - credProfile: profile, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, + for _, url := range nvmeDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+url, NVME, target, profile, retryClient))) } + + tasks = append(tasks, + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal/", THERMAL, target, profile, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power/", POWER, target, profile, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/", MEMORY, target, profile, retryClient))) + + exp.pool = pool.NewPool(tasks, 1) + + return &exp, nil } // Describe describes all the metrics ever exported by the fishymetrics exporter. It @@ -233,7 +279,6 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { n.Describe(ch) } } - ch <- e.up.Desc() } // Collect fetches the stats from configured fishymetrics location and delivers them @@ -248,10 +293,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if _, ok := common.IgnoredDevices[e.host]; !ok { e.scrape() } else { - e.up.Set(float64(2)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(2)) } - ch <- e.up e.collectMetrics(ch) } @@ -285,7 +330,7 @@ func (e *Exporter) scrape() { if task.Err != nil { deviceState := uint8(0) // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { + if errors.Is(task.Err, common.ErrInvalidCredential) { common.IgnoredDevices[e.host] = common.IgnoredDevice{ Name: e.host, Endpoint: "https://" + e.host + "/redfish/v1/Chassis", @@ -297,7 +342,8 @@ func (e *Exporter) scrape() { } else { deviceState = 0 } - e.up.Set(float64(deviceState)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(deviceState)) log.Error("error from "+XL420, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) return } @@ -307,10 +353,12 @@ func (e *Exporter) scrape() { err = e.exportThermalMetrics(task.Body) case POWER: err = e.exportPowerMetrics(task.Body) + case NVME: + err = e.exportNVMeDriveMetrics(task.Body) + case DISKDRIVE: + err = e.exportPhysicalDriveMetrics(task.Body) case LOGICALDRIVE: err = e.exportLogicalDriveMetrics(task.Body) - case DRIVE: - err = e.exportPhysicalDriveMetrics(task.Body) case MEMORY: err = e.exportMemoryMetrics(task.Body) } @@ -329,7 +377,8 @@ func (e *Exporter) scrape() { state &= result } - e.up.Set(float64(state)) + var upMetric = (*e.deviceMetrics)["up"] + (*upMetric)["up"].WithLabelValues().Set(float64(state)) } @@ -435,7 +484,7 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { var dlDrive = (*e.deviceMetrics)["logicalDriveMetrics"] err := json.Unmarshal(body, &dld) if err != nil { - return fmt.Errorf("Error Unmarshalling DL560 LogicalDriveMetrics - " + err.Error()) + return fmt.Errorf("Error Unmarshalling XL420 LogicalDriveMetrics - " + err.Error()) } // Check logical drive is enabled then check status and convert string to numeric values if dld.Status.State == "Enabled" { @@ -448,7 +497,7 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { state = DISABLED } - (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) + (*dlDrive)["raidStatus"].WithLabelValues(dld.Name, dld.LogicalDriveName, dld.VolumeUniqueIdentifier, dld.Raid).Set(state) return nil } @@ -457,8 +506,8 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { var state float64 - var dpd PhysicalDriveMetrics - var dpDrive = (*e.deviceMetrics)["driveMetrics"] + var dpd DiskDriveMetrics + var dpDrive = (*e.deviceMetrics)["diskDriveMetrics"] err := json.Unmarshal(body, &dpd) if err != nil { return fmt.Errorf("Error Unmarshalling XL420 DriveMetrics - " + err.Error()) @@ -474,8 +523,33 @@ func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { state = DISABLED } - (*dpDrive)["physicalDriveStatus"].WithLabelValues(dpd.Name, dpd.ID, dpd.Location, dpd.SerialNumber).Set(state) + (*dpDrive)["driveStatus"].WithLabelValues(dpd.Name, dpd.Id, dpd.Location, dpd.SerialNumber).Set(state) + + return nil +} + +// exportNVMeDriveMetrics collects the DL360 NVME drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { + var state float64 + var dlnvme NVMeDriveMetrics + var dlnvmedrive = (*e.deviceMetrics)["nvmeMetrics"] + err := json.Unmarshal(body, &dlnvme) + if err != nil { + return fmt.Errorf("Error Unmarshalling XL420 NVMeDriveMetrics - " + err.Error()) + } + + // Check nvme drive is enabled then check status and convert string to numeric values + if dlnvme.Oem.Hpe.DriveStatus.State == "Enabled" { + if dlnvme.Oem.Hpe.DriveStatus.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.PhysicalLocation.PartLocation.ServiceLabel).Set(state) return nil } @@ -501,8 +575,8 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } -func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericDrive, error) { - var drives GenericDrive +func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDrive, error) { + var drive GenericDrive var resp *http.Response var err error retryCount := 0 @@ -510,7 +584,7 @@ func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericD resp, err = common.DoRequest(client, req) if err != nil { - return drives, err + return drive, err } defer resp.Body.Close() if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { @@ -521,24 +595,26 @@ func getDriveEndpoints(url, host string, client *retryablehttp.Client) (GenericD retryCount = retryCount + 1 } if err != nil { - return drives, err + return drive, err } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return drives, fmt.Errorf("HTTP status %d", resp.StatusCode) + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } + } else if resp.StatusCode == http.StatusUnauthorized { + return drive, common.ErrInvalidCredential } else { - return drives, fmt.Errorf("HTTP status %d", resp.StatusCode) + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) } } body, err := io.ReadAll(resp.Body) if err != nil { - return drives, fmt.Errorf("Error reading Response Body - " + err.Error()) + return drive, fmt.Errorf("Error reading Response Body - " + err.Error()) } - err = json.Unmarshal(body, &drives) + err = json.Unmarshal(body, &drive) if err != nil { - return drives, fmt.Errorf("Error Unmarshalling DL560 Drive Collection struct - " + err.Error()) + return drive, fmt.Errorf("Error Unmarshalling XL420 Drive Collection struct - " + err.Error()) } - return drives, nil + return drive, nil } diff --git a/hpe/xl420/exporter_test.go b/hpe/xl420/exporter_test.go new file mode 100644 index 0000000..00abd0e --- /dev/null +++ b/hpe/xl420/exporter_test.go @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package xl420 + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" +) + +const ( + up2Response = ` + # HELP up was the last scrape of fishymetrics successful. + # TYPE up gauge + up 2 + ` +) + +type TestErrorResponse struct { + Error TestError `json:"error"` +} + +type TestError struct { + Code string `json:"code"` + Message string `json:"message"` + ExtendedInfo []TestMessage `json:"@Message.ExtendedInfo"` +} + +type TestMessage struct { + MessageId string `json:"MessageId"` +} + +func MustMarshal(v interface{}) []byte { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return b +} + +func Test_XL420_Exporter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/redfish/v1/badcred/Systems/1/SmartStorage/ArrayControllers/" { + w.WriteHeader(http.StatusUnauthorized) + w.Write(MustMarshal(TestErrorResponse{ + Error: TestError{ + Code: "iLO.0.10.ExtendedInfo", + Message: "See @Message.ExtendedInfo for more information.", + ExtendedInfo: []TestMessage{ + { + MessageId: "Base.1.0.NoValidSession", + }, + }, + }, + })) + return + } + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Unknown path - please create test case(s) for it")) + })) + defer server.Close() + + ctx := context.Background() + assert := assert.New(t) + + tests := []struct { + name string + uri string + metricName string + metricRef1 string + metricRef2 string + payload []byte + expected string + }{ + { + name: "Bad Credentials", + uri: "/redfish/v1/badcred", + metricName: "up", + metricRef1: "up", + metricRef2: "up", + expected: up2Response, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var exporter prometheus.Collector + var err error + exporter, err = NewExporter(ctx, server.URL, test.uri, "") + assert.Nil(err) + assert.NotNil(exporter) + + prometheus.MustRegister(exporter) + + metric := (*exporter.(*Exporter).deviceMetrics)[test.metricRef1] + m := (*metric)[test.metricRef2] + + assert.Empty(testutil.CollectAndCompare(m, strings.NewReader(test.expected), test.metricName)) + + prometheus.Unregister(exporter) + + }) + } +} diff --git a/hpe/xl420/metrics.go b/hpe/xl420/metrics.go index 4f6dad8..dc4ed6a 100644 --- a/hpe/xl420/metrics.go +++ b/hpe/xl420/metrics.go @@ -35,6 +35,10 @@ func newServerMetric(metricName string, docString string, constLabels prometheus func NewDeviceMetrics() *map[string]*metrics { var ( + UpMetric = &metrics{ + "up": newServerMetric("up", "was the last scrape of fishymetrics successful.", nil, []string{}), + } + ThermalMetrics = &metrics{ "fanSpeed": newServerMetric("xl420_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), "fanStatus": newServerMetric("xl420_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), @@ -49,12 +53,20 @@ func NewDeviceMetrics() *map[string]*metrics { "supplyTotalCapacity": newServerMetric("xl420_power_supply_total_capacity", "Total output capacity of all the power supplies", nil, []string{"memberId"}), } - LogicalDriveMetrics = &metrics{ - "logicalDriveStatus": newServerMetric("xl420_logical_drive_status", "Current logical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), + // Splitting out the three different types of drives to gather metrics on each (NVMe, Disk Drive, and Logical Drive) + // NVMe Drive Metrics + NVMeDriveMetrics = &metrics{ + "nvmeDriveStatus": newServerMetric("xl420_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"protocol", "id", "serviceLabel"}), } - PhysicalDriveMetrics = &metrics{ - "physicalDriveStatus": newServerMetric("xl420_physical_drive_status", "Current physical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), + // Phyiscal Storage Disk Drive Metrics + DiskDriveMetrics = &metrics{ + "driveStatus": newServerMetric("xl420_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "id", "location", "serialnumber"}), // DiskDriveStatus values + } + + // Logical Disk Drive Metrics + LogicalDriveMetrics = &metrics{ + "raidStatus": newServerMetric("xl420_logical_drive_status", "Current Logical Drive Raid 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicaldrivename", "volumeuniqueidentifier", "raid"}), // Logical Drive Raid value } MemoryMetrics = &metrics{ @@ -62,10 +74,12 @@ func NewDeviceMetrics() *map[string]*metrics { } Metrics = &map[string]*metrics{ + "up": UpMetric, "thermalMetrics": ThermalMetrics, "powerMetrics": PowerMetrics, + "nvmeMetrics": NVMeDriveMetrics, + "diskDriveMetrics": DiskDriveMetrics, "logicalDriveMetrics": LogicalDriveMetrics, - "driveMetrics": PhysicalDriveMetrics, "memoryMetrics": MemoryMetrics, } )