Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

Commit

Permalink
Merge pull request #41 from Xaelias/master
Browse files Browse the repository at this point in the history
XDR] Update metrics, add DC specific metrics
  • Loading branch information
alicebob authored May 12, 2020
2 parents a1998a4 + ff6eb8f commit 2db4fae
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 33 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
asprom
dist/
.*.swp
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ import (

const (
namespace = "aerospike"
secondaryIndex = "sindex"
systemNode = "node"
systemNamespace = "ns"
systemLatency = "latency"
systemLatencyHist = "latency_hist" // total number of ops
systemOps = "ops"
systemSet = "set"
secondaryIndex = "sindex"
xdrDC = "xdr"
)

var (
Expand Down Expand Up @@ -126,11 +127,12 @@ func newAsCollector(nodeAddr, username, password string) *asCollector {
password: password,
totalScrapes: totalScrapes,
collectors: []collector{
newStatsCollector(),
newNSCollector(),
newLatencyCollector(),
newNSCollector(),
newSetCollector(),
newSindexCollector(),
newStatsCollector(),
newXdrDCCollector(),
},
}
}
Expand Down
17 changes: 14 additions & 3 deletions namespaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,20 @@ var (
counter("udf_sub_udf_complete", "udf sub udf complete"),
counter("udf_sub_udf_error", "udf sub udf error"),
counter("udf_sub_udf_timeout", "udf sub udf timeout"),
counter("xdr_write_error", "xdr write error"),
counter("xdr_write_success", "xdr write success"),
counter("xdr_write_timeout", "xdr write timeout"),
counter("xdr_client_delete_error", "xdr client delete error"),
counter("xdr_client_delete_not_found", "xdr client delete not found"),
counter("xdr_client_delete_success", "xdr client delete success"),
counter("xdr_client_delete_timeout", "xdr client delete timeout"),
counter("xdr_client_write_error", "xdr client write error"),
counter("xdr_client_write_success", "xdr client write success"),
counter("xdr_client_write_timeout", "xdr client write timeout"),
counter("xdr_from_proxy_delete_error", "xdr from proxy delete error"),
counter("xdr_from_proxy_delete_not_found", "xdr from proxy delete not found"),
counter("xdr_from_proxy_delete_success", "xdr from proxy delete success"),
counter("xdr_from_proxy_delete_timeout", "xdr from proxy delete timeout"),
counter("xdr_from_proxy_write_error", "xdr from proxy write error"),
counter("xdr_from_proxy_write_success", "xdr from proxy write success"),
counter("xdr_from_proxy_write_timeout", "xdr from proxy write timeout"),
gauge("available_bin_names", "available bin names"),
gauge("device_available_pct", "device available pct"),
gauge("device_compression_ratio", "device compression ratio"),
Expand Down
65 changes: 38 additions & 27 deletions stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,41 +75,52 @@ var (
gauge("fabric_meta_recv_rate", "fabric meta recv rate"),
gauge("fabric_rw_send_rate", "fabric rw send rate"),
gauge("fabric_rw_recv_rate", "fabric rw recv rate"),
counter("xdr_ship_success", "xdr ship success"),
counter("xdr_ship_delete_success", "xdr ship delete success"),
counter("xdr_ship_source_error", "xdr ship source error"),
counter("xdr_ship_destination_error", "xdr ship destination error"),
gauge("xdr_ship_bytes", "xdr ship bytes"),
gauge("xdr_ship_latency_avg", "xdr ship latency avg"),
gauge("xdr_ship_compression_avg_pct", "xdr ship compression avg pct"),
gauge("xdr_ship_inflight_objects", "xdr ship inflight objects"),
gauge("xdr_ship_outstanding_objects", "xdr ship outstanding objects"),
counter("xdr_read_success", "xdr read success"),
counter("xdr_read_error", "xdr read error"),
gauge("xdr_read_notfound", "xdr read notfound"),
gauge("xdr_read_latency_avg", "xdr read latency avg"),
// XDR specific metrics
// requires Aerospike EE
gauge("dlog_free_pct", "dlog free pct"),
counter("dlog_logged", "dlog logged"),
counter("dlog_overwritten_error", "dlog overwritten error"),
counter("dlog_processed_link_down", "dlog processed link down"),
counter("dlog_processed_main", "dlog processed main"),
counter("dlog_processed_replica", "dlog processed replica"),
counter("dlog_relogged", "dlog relogged"),
gauge("dlog_used_objects", "dlog used objects"),
counter("local_recs_migration_retry", "Number of records missing in a batch call"),
counter("stat_pipe_reads_diginfo", "Number of digest information read from the named pipe."),
gauge("xdr_active_failed_node_sessions", "Number of active failed node sessions pending."),
gauge("xdr_active_link_down_sessions", "Number of active link down sessions pending."),
gauge("xdr_global_lastshiptime", "The minimum last ship time in millisecond (epoch) for XDR for across the cluster."),
counter("xdr_hotkey_fetch", "xdr hotkey fetch"),
counter("xdr_hotkey_skip", "xdr hotkey skip"),
counter("xdr_queue_overflow_error", "xdr queue overflow error"),
gauge("xdr_read_active_avg_pct", "xdr read active avg pct"),
counter("xdr_read_error", "xdr read error"),
gauge("xdr_read_idle_avg_pct", "xdr read idle avg pct"),
gauge("xdr_read_latency_avg", "xdr read latency avg"),
counter("xdr_read_notfound", "xdr read notfound"),
gauge("xdr_read_reqq_used", "xdr read reqq used"),
gauge("xdr_read_reqq_used_pct", "xdr read reqq used pct"),
gauge("xdr_read_respq_used", "xdr read respq used"),
counter("xdr_read_success", "xdr read success"),
gauge("xdr_read_txnq_used", "xdr read txnq used"),
gauge("xdr_read_txnq_used_pct", "xdr read txnq used pct"),
gauge("xdr_queue_overflow_error", "xdr queue overflow error"),
gauge("xdr_hotkey_fetch", "xdr hotkey fetch"),
gauge("xdr_hotkey_skip", "xdr hotkey skip"),
counter("xdr_unknown_namespace_error", "xdr unknown namespace error"),
counter("xdr_uninitialized_destination_error", "xdr uninitialized destination error"),
gauge("xdr_timelag", "xdr timelag"),
counter("xdr_relogged_incoming", "Number of records relogged into this node's digest log by another node."),
counter("xdr_relogged_outgoing", "Number of records relogged to another node's digest log. "),
counter("xdr_ship_bytes", "xdr ship bytes"),
gauge("xdr_ship_compression_avg_pct", "xdr ship compression avg pct"),
counter("xdr_ship_delete_success", "xdr ship delete success"),
counter("xdr_ship_destination_error", "xdr ship destination error"),
counter("xdr_ship_destination_permanent_error", "xdr ship destination permanent error"),
gauge("xdr_ship_fullrecord", "Number of records that did not take advantage of bin level shipping."),
gauge("xdr_ship_inflight_objects", "xdr ship inflight objects"),
gauge("xdr_ship_latency_avg", "xdr ship latency avg"),
gauge("xdr_ship_outstanding_objects", "xdr ship outstanding objects"),
counter("xdr_ship_source_error", "xdr ship source error"),
counter("xdr_ship_success", "xdr ship success"),
gauge("xdr_throughput", "xdr throughput"),
gauge("dlog_free_pct", "dlog free pct"),
counter("dlog_logged", "dlog logged"),
counter("dlog_overwritten_error", "dlog overwritten error"),
counter("dlog_processed_link_down", "dlog processed link down"),
counter("dlog_processed_main", "dlog processed main"),
counter("dlog_processed_replica", "dlog processed replica"),
counter("dlog_relogged", "dlog relogged"),
counter("dlog_used_objects", "dlog used objects"),
gauge("xdr_timelag", "xdr timelag"),
counter("xdr_uninitialized_destination_error", "xdr uninitialized destination error"),
counter("xdr_unknown_namespace_error", "xdr unknown namespace error"),
}
)

Expand Down
81 changes: 81 additions & 0 deletions xdr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package main

import (
"strings"

as "github.com/aerospike/aerospike-client-go"
"github.com/prometheus/client_golang/prometheus"
)

var (
// DCMetrics lists the keys we report from aero's dc statistics command.
// See `asinfo -l -v dcs` for a list of XDR DCs.
// See `asinfo -l -v dc/<dc>` for detailed metrics for a given DC.
DCMetrics = []metric{
gauge("dc_as_open_conn", "Number of open connection to the Aerospike DC."),
gauge("dc_as_size", "The cluster size of the destination Aerospike DC."),
gauge("dc_http_good_locations", "Number of URLs that are considered healthy."),
gauge("dc_http_locations", "Number of URLs configured for the HTTP destination."),
counter("dc_ship_attempt", "Number of records that have been attempted to be shipped."),
counter("dc_ship_bytes", "Number of bytes shipped for this DC."),
counter("dc_ship_delete_success", "Number of delete transactions that have been successfully shipped."),
counter("dc_ship_destination_error", "Number of errors from the remote cluster(s) while shipping records for this DC."),
gauge("dc_ship_idle_avg", "Average number of ms of sleep for each record being shipped."),
gauge("dc_ship_idle_avg_pct", "Representation in percent of total time spent for dc_ship_idle_avg."),
gauge("dc_ship_inflight_objects", "Number of records that are inflight."),
gauge("dc_ship_latency_avg", "Moving average of shipping latency for the specific DC."),
counter("dc_ship_source_error", "Number of client layer errors while shipping records for this DC."),
counter("dc_ship_success", "Number of records that have been successfully shipped."),
// dc_state https://www.aerospike.com/docs/reference/metrics/?show-removed=0#dc_state
gauge("dc_timelag", "Time lag for this specific DC."),
}
)

type XdrDCCollector cmetrics

func newXdrDCCollector() XdrDCCollector {
dc := map[string]cmetric {}
for _, m := range DCMetrics {
dc[m.aeroName] = cmetric{
typ: m.typ,
desc: prometheus.NewDesc(
promkey(xdrDC, m.aeroName),
m.desc,
[]string{"dc"},
nil,
),
}
}
return dc
}

func (dcc XdrDCCollector) describe(ch chan<- *prometheus.Desc) {
for _, s := range dcc {
ch <- s.desc
}
}

func (sic XdrDCCollector) collect(conn *as.Connection) ([]prometheus.Metric, error) {
info, err := as.RequestInfo(conn, "dcs")
if err != nil {
return nil, err
}

var metrics []prometheus.Metric
for _, dc := range strings.Split(info["dcs"], ";") {
dcInfo, err := as.RequestInfo(conn, "dc/"+dc)
if err != nil {
return nil, err
}

metrics = append(
metrics,
infoCollect(
cmetrics(sic),
dcInfo["dc/"+dc],
dc,
)...,
)
}
return metrics, nil
}

0 comments on commit 2db4fae

Please sign in to comment.