From 4450fb3af8c447070b2bcdbc95713ab077ba2120 Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Mon, 11 Dec 2023 12:10:18 -0500 Subject: [PATCH 1/7] ref: register metrics and lock outside collector This helps fix some concurrency problems by registering the metrics once outside the collector and introducing a mutex lock when values get updated --- internal/collector/icmp_collector.go | 151 +++++++++++---------------- internal/server/server.go | 45 +++++++- 2 files changed, 103 insertions(+), 93 deletions(-) diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index 3b7b7b7..cd24192 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -1,10 +1,10 @@ package collector import ( - "math" "net/http" "strconv" "strings" + "sync" "time" probing "github.com/prometheus-community/pro-bing" @@ -14,7 +14,6 @@ import ( ) const ( - namespace = "ping_" defaultTimeout = time.Second * 10 defaultInterval = time.Second defaultCount = 5 @@ -26,41 +25,6 @@ const ( minPacketSize = 24 ) -var ( - pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "success", - Help: "Returns whether the ping succeeded", - }) - pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "timeout", - Help: "Returns whether the ping failed by timeout", - }) - probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "duration_seconds", - Help: "Returns how long the probe took to complete in seconds", - }) - minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_min_seconds", - Help: "Best round trip time", - }) - maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_max_seconds", - Help: "Worst round trip time", - }) - avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_avg_seconds", - Help: "Mean round trip time", - }) - stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_std_deviation", - Help: "Standard deviation", - }) - lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "loss_ratio", - Help: "Packet loss from 0 to 100", - }) -) - type pingParams struct { target string timeout time.Duration @@ -146,73 +110,76 @@ func serveMetricsWithError(w http.ResponseWriter, r *http.Request, registry *pro } } -func PingHandler(w http.ResponseWriter, r *http.Request) { - p := parseParams(r) - start := time.Now() +func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gauge, pingTimeoutGauge prometheus.Gauge, probeDurationGauge prometheus.Gauge, minGauge prometheus.Gauge, maxGauge prometheus.Gauge, avgGauge prometheus.Gauge, stddevGauge prometheus.Gauge, lossGauge prometheus.Gauge, mutex *sync.Mutex) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { - registry := prometheus.NewRegistry() - registry.MustRegister(pingSuccessGauge, pingTimeoutGauge, probeDurationGauge, minGauge, maxGauge, avgGauge, stddevGauge, lossGauge) + p := parseParams(r) + start := time.Now() - // assume failure - pingSuccessGauge.Set(0) - pingTimeoutGauge.Set(1) + // TODO use atomic lock and reduce lock duration, dont think this is needed + mutex.Lock() - log.Debugf("Request received with parameters: target=%v, count=%v, size=%v, interval=%v, timeout=%v, ttl=%v, packet=%v", - p.target, p.count, p.size, p.interval, p.timeout, p.ttl, p.packet) + // assume failure + pingSuccessGauge.Set(0) + pingTimeoutGauge.Set(1) - pinger := probing.New(p.target) + mutex.Unlock() - pinger.Count = p.count - pinger.Size = p.size - pinger.Interval = p.interval - pinger.Timeout = p.timeout - pinger.TTL = p.ttl + log.Debugf("Request received with parameters: target=%v, count=%v, size=%v, interval=%v, timeout=%v, ttl=%v, packet=%v", + p.target, p.count, p.size, p.interval, p.timeout, p.ttl, p.packet) - if p.packet == "icmp" { - pinger.SetPrivileged(true) - } else { - pinger.SetPrivileged(false) - } + pinger := probing.New(p.target) - if p.protocol == "v6" || p.protocol == "6" || p.protocol == "ip6" { - pinger.SetNetwork("ip6") - } else { - pinger.SetNetwork("ip4") - } + pinger.Count = p.count + pinger.Size = p.size + pinger.Interval = p.interval + pinger.Timeout = p.timeout + pinger.TTL = p.ttl - pinger.OnRecv = func(pkt *probing.Packet) { - log.Debugf("%d bytes from %s: icmp_seq=%d time=%v ttl=%v\n", - pkt.Nbytes, pkt.IPAddr, pkt.Seq, pkt.Rtt, pkt.TTL) - } + if p.packet == "icmp" { + pinger.SetPrivileged(true) + } else { + pinger.SetPrivileged(false) + } - pinger.OnDuplicateRecv = func(pkt *probing.Packet) { - log.Debugf("%d bytes from %s: icmp_seq=%d time=%v ttl=%v (DUP!)\n", - pkt.Nbytes, pkt.IPAddr, pkt.Seq, pkt.Rtt, pkt.TTL) - } + if p.protocol == "v6" || p.protocol == "6" || p.protocol == "ip6" { + pinger.SetNetwork("ip6") + } else { + pinger.SetNetwork("ip4") + } + + pinger.OnFinish = func(stats *probing.Statistics) { + log.Debugf("OnFinish: target=%v, PacketsSent=%d, PacketsRecv=%d, PacketLoss=%f%%, MinRtt=%v, AvgRtt=%v, MaxRtt=%v, StdDevRtt=%v, Duration=%v", + stats.IPAddr, pinger.PacketsSent, pinger.PacketsRecv, stats.PacketLoss, stats.MinRtt, stats.AvgRtt, stats.MaxRtt, stats.StdDevRtt, time.Since(start)) + + // lock while we attribute values to + mutex.Lock() + if pinger.PacketsRecv > 0 && pinger.Timeout > time.Since(start) { + log.Debugf("Ping successful: target=%v", stats.IPAddr) + pingSuccessGauge.Set(1) + pingTimeoutGauge.Set(0) + } else if pinger.Timeout < time.Since(start) { + log.Debugf("Ping timeout: target=%v", stats.IPAddr) + pingTimeoutGauge.Set(1) + pingSuccessGauge.Set(0) + } else if pinger.PacketsRecv == 0 { + log.Debugf("Ping failed, no packets received: target=%v", stats.IPAddr) + pingSuccessGauge.Set(0) + pingTimeoutGauge.Set(0) + } - pinger.OnFinish = func(stats *probing.Statistics) { - log.Debugf("OnFinish: PacketsSent=%d, PacketsRecv=%d, PacketLoss=%f%%, MinRtt=%v, AvgRtt=%v, MaxRtt=%v, StdDevRtt=%v, Duration=%v", - pinger.PacketsSent, pinger.PacketsRecv, stats.PacketLoss, stats.MinRtt, stats.AvgRtt, stats.MaxRtt, stats.StdDevRtt, time.Since(start)) - - const tolerance = 0.001 // tolerance for easier floating point comparisons - // success declared if we sent as many packets as intended, we got back all packets sent, and packet loss is not equal to 100% - if pinger.Count == pinger.PacketsRecv && math.Abs(stats.PacketLoss-100) > tolerance { - // no error will be raised if we reach a timeout - // https://github.com/prometheus-community/pro-bing/issues/70 - pingSuccessGauge.Set(1) - pingTimeoutGauge.Set(0) + minGauge.Set(stats.MinRtt.Seconds()) + avgGauge.Set(stats.AvgRtt.Seconds()) + maxGauge.Set(stats.MaxRtt.Seconds()) + stddevGauge.Set(float64(stats.StdDevRtt)) + lossGauge.Set(stats.PacketLoss) + probeDurationGauge.Set(time.Since(start).Seconds()) + mutex.Unlock() } - minGauge.Set(stats.MinRtt.Seconds()) - avgGauge.Set(stats.AvgRtt.Seconds()) - maxGauge.Set(stats.MaxRtt.Seconds()) - stddevGauge.Set(float64(stats.StdDevRtt)) - lossGauge.Set(stats.PacketLoss) - probeDurationGauge.Set(time.Since(start).Seconds()) - } - if err := pinger.Run(); err != nil { - log.Error("Failed to ping target host:", err) + if err := pinger.Run(); err != nil { + log.Error("Failed to ping target host:", err) + } serveMetricsWithError(w, r, registry) } - serveMetricsWithError(w, r, registry) } diff --git a/internal/server/server.go b/internal/server/server.go index f6e00a0..4f1a717 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -4,7 +4,9 @@ import ( "fmt" "net/http" "net/http/pprof" + "sync" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" "github.com/wbollock/ping_exporter/internal/collector" @@ -21,11 +23,52 @@ const ( defaultMetricsPath = "/metrics" ) +const namespace = "ping_" + +var ( + pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "success", + Help: "Returns whether the ping succeeded", + }) + pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "timeout", + Help: "Returns whether the ping failed by timeout", + }) + probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "duration_seconds", + Help: "Returns how long the probe took to complete in seconds", + }) + minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_min_seconds", + Help: "Best round trip time", + }) + maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_max_seconds", + Help: "Worst round trip time", + }) + avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_avg_seconds", + Help: "Mean round trip time", + }) + stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_std_deviation", + Help: "Standard deviation", + }) + lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "loss_ratio", + Help: "Packet loss from 0 to 100", + }) +) + func SetupServer() http.Handler { mux := http.NewServeMux() mux.Handle(defaultMetricsPath, promhttp.Handler()) - mux.HandleFunc("/probe", collector.PingHandler) + + var mutex sync.Mutex + registry := prometheus.NewRegistry() + registry.MustRegister(pingSuccessGauge, pingTimeoutGauge, probeDurationGauge, minGauge, maxGauge, avgGauge, stddevGauge, lossGauge) + mux.HandleFunc("/probe", collector.PingHandler(registry, pingSuccessGauge, pingTimeoutGauge, probeDurationGauge, minGauge, maxGauge, avgGauge, stddevGauge, lossGauge, &mutex)) // for non-standard web servers, need to register handlers mux.HandleFunc("/debug/pprof/", http.HandlerFunc(pprof.Index)) From ae96e911f1603947e3d9edc69ebfef03ea4abf22 Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Mon, 11 Dec 2023 16:43:27 -0500 Subject: [PATCH 2/7] ref: make ping timeout and failed info msgs --- internal/collector/icmp_collector.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index cd24192..1b1bd33 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -159,11 +159,11 @@ func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gaug pingSuccessGauge.Set(1) pingTimeoutGauge.Set(0) } else if pinger.Timeout < time.Since(start) { - log.Debugf("Ping timeout: target=%v", stats.IPAddr) + log.Infof("Ping timeout: target=%v, timeout=%v, duration=%v", stats.IPAddr, pinger.Timeout, time.Since(start)) pingTimeoutGauge.Set(1) pingSuccessGauge.Set(0) } else if pinger.PacketsRecv == 0 { - log.Debugf("Ping failed, no packets received: target=%v", stats.IPAddr) + log.Infof("Ping failed, no packets received: target=%v, packetsRecv=%v, packetsSent=%v", stats.IPAddr, pinger.PacketsRecv, pinger.PacketsSent) pingSuccessGauge.Set(0) pingTimeoutGauge.Set(0) } From d8c9ed4999e946a4fc1afab0b0f3293039ba37ad Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Mon, 11 Dec 2023 16:44:09 -0500 Subject: [PATCH 3/7] chore: reduce unneeded lock --- internal/collector/icmp_collector.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index 1b1bd33..0cdd289 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -116,15 +116,6 @@ func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gaug p := parseParams(r) start := time.Now() - // TODO use atomic lock and reduce lock duration, dont think this is needed - mutex.Lock() - - // assume failure - pingSuccessGauge.Set(0) - pingTimeoutGauge.Set(1) - - mutex.Unlock() - log.Debugf("Request received with parameters: target=%v, count=%v, size=%v, interval=%v, timeout=%v, ttl=%v, packet=%v", p.target, p.count, p.size, p.interval, p.timeout, p.ttl, p.packet) @@ -152,7 +143,6 @@ func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gaug log.Debugf("OnFinish: target=%v, PacketsSent=%d, PacketsRecv=%d, PacketLoss=%f%%, MinRtt=%v, AvgRtt=%v, MaxRtt=%v, StdDevRtt=%v, Duration=%v", stats.IPAddr, pinger.PacketsSent, pinger.PacketsRecv, stats.PacketLoss, stats.MinRtt, stats.AvgRtt, stats.MaxRtt, stats.StdDevRtt, time.Since(start)) - // lock while we attribute values to mutex.Lock() if pinger.PacketsRecv > 0 && pinger.Timeout > time.Since(start) { log.Debugf("Ping successful: target=%v", stats.IPAddr) From 1dfd63e3687cc2e2597b0656fd35eeae59157902 Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Wed, 13 Dec 2023 11:37:15 -0500 Subject: [PATCH 4/7] ref: define PingMetrics type this cuts down on the long parameter list in the collector function --- internal/collector/icmp_collector.go | 27 ++++++++++++++------------- internal/metrics/metrics.go | 16 ++++++++++++++++ internal/server/server.go | 17 +++++++++++++++-- 3 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 internal/metrics/metrics.go diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index 0cdd289..4d20d4e 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -11,6 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" + "github.com/wbollock/ping_exporter/internal/metrics" ) const ( @@ -110,7 +111,7 @@ func serveMetricsWithError(w http.ResponseWriter, r *http.Request, registry *pro } } -func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gauge, pingTimeoutGauge prometheus.Gauge, probeDurationGauge prometheus.Gauge, minGauge prometheus.Gauge, maxGauge prometheus.Gauge, avgGauge prometheus.Gauge, stddevGauge prometheus.Gauge, lossGauge prometheus.Gauge, mutex *sync.Mutex) http.HandlerFunc { +func PingHandler(registry *prometheus.Registry, metrics metrics.PingMetrics, mutex *sync.Mutex) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { p := parseParams(r) @@ -146,24 +147,24 @@ func PingHandler(registry *prometheus.Registry, pingSuccessGauge prometheus.Gaug mutex.Lock() if pinger.PacketsRecv > 0 && pinger.Timeout > time.Since(start) { log.Debugf("Ping successful: target=%v", stats.IPAddr) - pingSuccessGauge.Set(1) - pingTimeoutGauge.Set(0) + metrics.PingSuccessGauge.Set(1) + metrics.PingTimeoutGauge.Set(0) } else if pinger.Timeout < time.Since(start) { log.Infof("Ping timeout: target=%v, timeout=%v, duration=%v", stats.IPAddr, pinger.Timeout, time.Since(start)) - pingTimeoutGauge.Set(1) - pingSuccessGauge.Set(0) + metrics.PingTimeoutGauge.Set(1) + metrics.PingSuccessGauge.Set(0) } else if pinger.PacketsRecv == 0 { log.Infof("Ping failed, no packets received: target=%v, packetsRecv=%v, packetsSent=%v", stats.IPAddr, pinger.PacketsRecv, pinger.PacketsSent) - pingSuccessGauge.Set(0) - pingTimeoutGauge.Set(0) + metrics.PingSuccessGauge.Set(0) + metrics.PingTimeoutGauge.Set(0) } - minGauge.Set(stats.MinRtt.Seconds()) - avgGauge.Set(stats.AvgRtt.Seconds()) - maxGauge.Set(stats.MaxRtt.Seconds()) - stddevGauge.Set(float64(stats.StdDevRtt)) - lossGauge.Set(stats.PacketLoss) - probeDurationGauge.Set(time.Since(start).Seconds()) + metrics.MinGauge.Set(stats.MinRtt.Seconds()) + metrics.AvgGauge.Set(stats.AvgRtt.Seconds()) + metrics.MaxGauge.Set(stats.MaxRtt.Seconds()) + metrics.StddevGauge.Set(float64(stats.StdDevRtt)) + metrics.LossGauge.Set(stats.PacketLoss) + metrics.ProbeDurationGauge.Set(time.Since(start).Seconds()) mutex.Unlock() } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..a6bb1c8 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,16 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type PingMetrics struct { + PingSuccessGauge prometheus.Gauge + PingTimeoutGauge prometheus.Gauge + ProbeDurationGauge prometheus.Gauge + MinGauge prometheus.Gauge + MaxGauge prometheus.Gauge + AvgGauge prometheus.Gauge + StddevGauge prometheus.Gauge + LossGauge prometheus.Gauge +} diff --git a/internal/server/server.go b/internal/server/server.go index 4f1a717..d9219e9 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -10,6 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" "github.com/wbollock/ping_exporter/internal/collector" + "github.com/wbollock/ping_exporter/internal/metrics" ) const ( @@ -67,8 +68,20 @@ func SetupServer() http.Handler { var mutex sync.Mutex registry := prometheus.NewRegistry() - registry.MustRegister(pingSuccessGauge, pingTimeoutGauge, probeDurationGauge, minGauge, maxGauge, avgGauge, stddevGauge, lossGauge) - mux.HandleFunc("/probe", collector.PingHandler(registry, pingSuccessGauge, pingTimeoutGauge, probeDurationGauge, minGauge, maxGauge, avgGauge, stddevGauge, lossGauge, &mutex)) + + metrics := metrics.PingMetrics{ + PingSuccessGauge: pingSuccessGauge, + PingTimeoutGauge: pingTimeoutGauge, + ProbeDurationGauge: probeDurationGauge, + MinGauge: minGauge, + MaxGauge: maxGauge, + AvgGauge: avgGauge, + StddevGauge: stddevGauge, + LossGauge: lossGauge, + } + + registry.MustRegister(metrics.PingSuccessGauge, metrics.PingTimeoutGauge, metrics.ProbeDurationGauge, metrics.MinGauge, metrics.MaxGauge, metrics.AvgGauge, metrics.StddevGauge, metrics.LossGauge) + mux.HandleFunc("/probe", collector.PingHandler(registry, metrics, &mutex)) // for non-standard web servers, need to register handlers mux.HandleFunc("/debug/pprof/", http.HandlerFunc(pprof.Index)) From 652caf40fb4184426fd2b1b2c0e54fdac880b81f Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Wed, 13 Dec 2023 15:29:12 -0500 Subject: [PATCH 5/7] ref: de-globalize server variables --- internal/server/server.go | 93 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/internal/server/server.go b/internal/server/server.go index d9219e9..be6f240 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -13,55 +13,56 @@ import ( "github.com/wbollock/ping_exporter/internal/metrics" ) -const ( - defaultHTML = ` - Ping Exporter - -

Ping Exporter

-

Metrics

- - ` - defaultMetricsPath = "/metrics" -) +func SetupServer() http.Handler { -const namespace = "ping_" + const ( + defaultHTML = ` + Ping Exporter + +

Ping Exporter

+

Metrics

+ + ` + defaultMetricsPath = "/metrics" + ) -var ( - pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "success", - Help: "Returns whether the ping succeeded", - }) - pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "timeout", - Help: "Returns whether the ping failed by timeout", - }) - probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "duration_seconds", - Help: "Returns how long the probe took to complete in seconds", - }) - minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_min_seconds", - Help: "Best round trip time", - }) - maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_max_seconds", - Help: "Worst round trip time", - }) - avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_avg_seconds", - Help: "Mean round trip time", - }) - stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_std_deviation", - Help: "Standard deviation", - }) - lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "loss_ratio", - Help: "Packet loss from 0 to 100", - }) -) + const namespace = "ping_" + + var ( + pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "success", + Help: "Returns whether the ping succeeded", + }) + pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "timeout", + Help: "Returns whether the ping failed by timeout", + }) + probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "duration_seconds", + Help: "Returns how long the probe took to complete in seconds", + }) + minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_min_seconds", + Help: "Best round trip time", + }) + maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_max_seconds", + Help: "Worst round trip time", + }) + avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_avg_seconds", + Help: "Mean round trip time", + }) + stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_std_deviation", + Help: "Standard deviation", + }) + lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "loss_ratio", + Help: "Packet loss from 0 to 100", + }) + ) -func SetupServer() http.Handler { mux := http.NewServeMux() mux.Handle(defaultMetricsPath, promhttp.Handler()) From a3b996ef705b837f00a1fa261348cfd3bef5fc83 Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Wed, 13 Dec 2023 17:06:05 -0500 Subject: [PATCH 6/7] fix: scope metric variables to collector function Turns out the entire contention/value leakage issue was just due to these globals in the icmp collector function --- internal/collector/icmp_collector.go | 82 ++++++++++++++++++++++------ internal/server/server.go | 57 +------------------ 2 files changed, 68 insertions(+), 71 deletions(-) diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index 4d20d4e..af0bacc 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -4,7 +4,6 @@ import ( "net/http" "strconv" "strings" - "sync" "time" probing "github.com/prometheus-community/pro-bing" @@ -14,18 +13,6 @@ import ( "github.com/wbollock/ping_exporter/internal/metrics" ) -const ( - defaultTimeout = time.Second * 10 - defaultInterval = time.Second - defaultCount = 5 - defaultSize = 56 - defaultTTL = 64 - defaultProtocol = "ip4" // or ip6 - defaultPacket = "icmp" // or udp - maxPacketSize = 65507 - minPacketSize = 24 -) - type pingParams struct { target string timeout time.Duration @@ -40,6 +27,18 @@ type pingParams struct { func parseParams(r *http.Request) pingParams { params := r.URL.Query() + const ( + defaultTimeout = time.Second * 10 + defaultInterval = time.Second + defaultCount = 5 + defaultSize = 56 + defaultTTL = 64 + defaultProtocol = "ip4" // or ip6 + defaultPacket = "icmp" // or udp + maxPacketSize = 65507 + minPacketSize = 24 + ) + p := pingParams{ target: params.Get("target"), timeout: defaultTimeout, @@ -111,9 +110,62 @@ func serveMetricsWithError(w http.ResponseWriter, r *http.Request, registry *pro } } -func PingHandler(registry *prometheus.Registry, metrics metrics.PingMetrics, mutex *sync.Mutex) http.HandlerFunc { +func PingHandler() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { + const ( + namespace = "ping_" + ) + + var ( + pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "success", + Help: "Returns whether the ping succeeded", + }) + pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "timeout", + Help: "Returns whether the ping failed by timeout", + }) + probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "duration_seconds", + Help: "Returns how long the probe took to complete in seconds", + }) + minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_min_seconds", + Help: "Best round trip time", + }) + maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_max_seconds", + Help: "Worst round trip time", + }) + avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_avg_seconds", + Help: "Mean round trip time", + }) + stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "rtt_std_deviation", + Help: "Standard deviation", + }) + lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: namespace + "loss_ratio", + Help: "Packet loss from 0 to 100", + }) + ) + + metrics := metrics.PingMetrics{ + PingSuccessGauge: pingSuccessGauge, + PingTimeoutGauge: pingTimeoutGauge, + ProbeDurationGauge: probeDurationGauge, + MinGauge: minGauge, + MaxGauge: maxGauge, + AvgGauge: avgGauge, + StddevGauge: stddevGauge, + LossGauge: lossGauge, + } + registry := prometheus.NewRegistry() + + registry.MustRegister(metrics.PingSuccessGauge, metrics.PingTimeoutGauge, metrics.ProbeDurationGauge, metrics.MinGauge, metrics.MaxGauge, metrics.AvgGauge, metrics.StddevGauge, metrics.LossGauge) + p := parseParams(r) start := time.Now() @@ -144,7 +196,6 @@ func PingHandler(registry *prometheus.Registry, metrics metrics.PingMetrics, mut log.Debugf("OnFinish: target=%v, PacketsSent=%d, PacketsRecv=%d, PacketLoss=%f%%, MinRtt=%v, AvgRtt=%v, MaxRtt=%v, StdDevRtt=%v, Duration=%v", stats.IPAddr, pinger.PacketsSent, pinger.PacketsRecv, stats.PacketLoss, stats.MinRtt, stats.AvgRtt, stats.MaxRtt, stats.StdDevRtt, time.Since(start)) - mutex.Lock() if pinger.PacketsRecv > 0 && pinger.Timeout > time.Since(start) { log.Debugf("Ping successful: target=%v", stats.IPAddr) metrics.PingSuccessGauge.Set(1) @@ -165,7 +216,6 @@ func PingHandler(registry *prometheus.Registry, metrics metrics.PingMetrics, mut metrics.StddevGauge.Set(float64(stats.StdDevRtt)) metrics.LossGauge.Set(stats.PacketLoss) metrics.ProbeDurationGauge.Set(time.Since(start).Seconds()) - mutex.Unlock() } if err := pinger.Run(); err != nil { diff --git a/internal/server/server.go b/internal/server/server.go index be6f240..aae60de 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -4,13 +4,10 @@ import ( "fmt" "net/http" "net/http/pprof" - "sync" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" "github.com/wbollock/ping_exporter/internal/collector" - "github.com/wbollock/ping_exporter/internal/metrics" ) func SetupServer() http.Handler { @@ -26,63 +23,13 @@ func SetupServer() http.Handler { defaultMetricsPath = "/metrics" ) - const namespace = "ping_" - - var ( - pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "success", - Help: "Returns whether the ping succeeded", - }) - pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "timeout", - Help: "Returns whether the ping failed by timeout", - }) - probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "duration_seconds", - Help: "Returns how long the probe took to complete in seconds", - }) - minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_min_seconds", - Help: "Best round trip time", - }) - maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_max_seconds", - Help: "Worst round trip time", - }) - avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_avg_seconds", - Help: "Mean round trip time", - }) - stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_std_deviation", - Help: "Standard deviation", - }) - lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "loss_ratio", - Help: "Packet loss from 0 to 100", - }) - ) - mux := http.NewServeMux() mux.Handle(defaultMetricsPath, promhttp.Handler()) - var mutex sync.Mutex - registry := prometheus.NewRegistry() - - metrics := metrics.PingMetrics{ - PingSuccessGauge: pingSuccessGauge, - PingTimeoutGauge: pingTimeoutGauge, - ProbeDurationGauge: probeDurationGauge, - MinGauge: minGauge, - MaxGauge: maxGauge, - AvgGauge: avgGauge, - StddevGauge: stddevGauge, - LossGauge: lossGauge, - } + pingHandler := collector.PingHandler() - registry.MustRegister(metrics.PingSuccessGauge, metrics.PingTimeoutGauge, metrics.ProbeDurationGauge, metrics.MinGauge, metrics.MaxGauge, metrics.AvgGauge, metrics.StddevGauge, metrics.LossGauge) - mux.HandleFunc("/probe", collector.PingHandler(registry, metrics, &mutex)) + mux.HandleFunc("/probe", pingHandler) // for non-standard web servers, need to register handlers mux.HandleFunc("/debug/pprof/", http.HandlerFunc(pprof.Index)) From ea7e3843fd4cdf17bd6a30c70002caa1b01d3d76 Mon Sep 17 00:00:00 2001 From: Will Bollock Date: Thu, 14 Dec 2023 17:52:19 -0500 Subject: [PATCH 7/7] ref: use namespace opt Co-authored-by: Will Hegedus --- internal/collector/icmp_collector.go | 42 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/internal/collector/icmp_collector.go b/internal/collector/icmp_collector.go index af0bacc..15d2360 100644 --- a/internal/collector/icmp_collector.go +++ b/internal/collector/icmp_collector.go @@ -114,41 +114,49 @@ func PingHandler() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { const ( - namespace = "ping_" + namespace = "ping" ) var ( pingSuccessGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "success", - Help: "Returns whether the ping succeeded", + Namespace: namespace, + Name: "success", + Help: "Returns whether the ping succeeded", }) pingTimeoutGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "timeout", - Help: "Returns whether the ping failed by timeout", + Namespace: namespace, + Name: "timeout", + Help: "Returns whether the ping failed by timeout", }) probeDurationGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "duration_seconds", - Help: "Returns how long the probe took to complete in seconds", + Namespace: namespace, + Name: "duration_seconds", + Help: "Returns how long the probe took to complete in seconds", }) minGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_min_seconds", - Help: "Best round trip time", + Namespace: namespace, + Name: "rtt_min_seconds", + Help: "Best round trip time", }) maxGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_max_seconds", - Help: "Worst round trip time", + Namespace: namespace, + Name: "rtt_max_seconds", + Help: "Worst round trip time", }) avgGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_avg_seconds", - Help: "Mean round trip time", + Namespace: namespace, + Name: "rtt_avg_seconds", + Help: "Mean round trip time", }) stddevGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "rtt_std_deviation", - Help: "Standard deviation", + Namespace: namespace, + Name: "rtt_std_deviation", + Help: "Standard deviation", }) lossGauge = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: namespace + "loss_ratio", - Help: "Packet loss from 0 to 100", + Namespace: namespace, + Name: "loss_ratio", + Help: "Packet loss from 0 to 100", }) )