Skip to content

Commit

Permalink
GH-1683 Add prometheus plugin for easier manual testing of prometheus…
Browse files Browse the repository at this point in the history
…. Add mapped_private database-map-mode since none of the tests currently use it.
  • Loading branch information
heifner committed Oct 11, 2023
1 parent ac6c70f commit d913a2e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 78 deletions.
101 changes: 67 additions & 34 deletions plugins/prometheus_plugin/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,34 @@ struct catalog_type {
// http plugin
prometheus::Family<Counter>& http_request_counts;

// net plugin p2p-connections
prometheus::Family<Gauge>& p2p_connections;

Gauge& num_peers;
Gauge& num_clients;

// net plugin failed p2p connection
Counter& failed_p2p_connections;

// net plugin dropped_trxs
Counter& dropped_trxs_total;

struct p2p_connection_metrics {
Gauge& num_peers;
Gauge& num_clients;

prometheus::Family<Gauge>& addr; // Empty gauge; ipv6 address can't be transmitted as a double
prometheus::Family<Gauge>& port;
prometheus::Family<Gauge>& connection_number;
prometheus::Family<Gauge>& accepting_blocks;
prometheus::Family<Gauge>& last_received_block;
prometheus::Family<Gauge>& first_available_block;
prometheus::Family<Gauge>& last_available_block;
prometheus::Family<Gauge>& unique_first_block_count;
prometheus::Family<Gauge>& latency;
prometheus::Family<Gauge>& bytes_received;
prometheus::Family<Gauge>& last_bytes_received;
prometheus::Family<Gauge>& bytes_sent;
prometheus::Family<Gauge>& last_bytes_sent;
prometheus::Family<Gauge>& connection_start_time;
prometheus::Family<Gauge>& peer_addr; // Empty gauge; we only want the label
};
p2p_connection_metrics p2p_metrics;

// producer plugin
prometheus::Family<Counter>& cpu_usage_us;
prometheus::Family<Counter>& net_usage_us;
Expand Down Expand Up @@ -97,12 +113,27 @@ struct catalog_type {
catalog_type()
: info(family<prometheus::Info>("nodeos", "static information about the server"))
, http_request_counts(family<Counter>("nodeos_http_requests_total", "number of HTTP requests"))
, p2p_connections(family<Gauge>("nodeos_p2p_connections", "current number of connected p2p connections"))
, num_peers(p2p_connections.Add({{"direction", "out"}}))
, num_clients(p2p_connections.Add({{"direction", "in"}}))
, failed_p2p_connections(
build<Counter>("nodeos_failed_p2p_connections", "total number of failed out-going p2p connections"))
, dropped_trxs_total(build<Counter>("nodeos_dropped_trxs_total", "total number of dropped transactions by net plugin"))
, failed_p2p_connections(build<Counter>("nodeos_p2p_failed_connections", "total number of failed out-going p2p connections"))
, dropped_trxs_total(build<Counter>("nodeos_p2p_dropped_trxs_total", "total number of dropped transactions by net plugin"))
, p2p_metrics{
.num_peers{build<Gauge>("nodeos_p2p_peers", "current number of connected outgoing peers")}
, .num_clients{build<Gauge>("nodeos_p2p_clients", "current number of connected incoming clients")}
, .addr{family<Gauge>("nodeos_p2p_addr", "ipv6 address")}
, .port{family<Gauge>("nodeos_p2p_port", "port")}
, .connection_number{family<Gauge>("nodeos_p2p_connection_number", "monatomic increasing connection number")}
, .accepting_blocks{family<Gauge>("nodeos_p2p_accepting_blocks", "accepting blocks on connection")}
, .last_received_block{family<Gauge>("nodeos_p2p_last_received_block", "last received block on connection")}
, .first_available_block{family<Gauge>("nodeos_p2p_first_available_block", "first block available from connection")}
, .last_available_block{family<Gauge>("nodeos_p2p_last_available_block", "last block available from connection")}
, .unique_first_block_count{family<Gauge>("nodeos_p2p_unique_first_block_count", "number of blocks first received from any connection on this connection")}
, .latency{family<Gauge>("nodeos_p2p_latency", "last calculated latency with connection")}
, .bytes_received{family<Gauge>("nodeos_p2p_bytes_received", "total bytes received on connection")}
, .last_bytes_received{family<Gauge>("nodeos_p2p_last_bytes_received", "last time anything received from peer")}
, .bytes_sent{family<Gauge>("nodeos_p2p_bytes_sent", "total bytes sent to peer")}
, .last_bytes_sent{family<Gauge>("nodeos_p2p_last_bytes_sent", "last time anything sent to peer")}
, .connection_start_time{family<Gauge>("nodeos_p2p_connection_start_time", "time of last connection to peer")}
, .peer_addr{family<Gauge>("nodeos_p2p_peer_addr", "peer address")}
}
, cpu_usage_us(family<Counter>("nodeos_cpu_usage_us_total", "total cpu usage in microseconds for blocks"))
, net_usage_us(family<Counter>("nodeos_net_usage_us_total", "total net usage in microseconds for blocks"))
, last_irreversible(build<Gauge>("nodeos_last_irreversible", "last irreversible block number"))
Expand Down Expand Up @@ -164,31 +195,33 @@ struct catalog_type {
}

void update(const net_plugin::p2p_connections_metrics& metrics) {
num_peers.Set(metrics.num_peers);
num_clients.Set(metrics.num_clients);
p2p_metrics.num_peers.Set(metrics.num_peers);
p2p_metrics.num_clients.Set(metrics.num_clients);
for(size_t i = 0; i < metrics.stats.peers.size(); ++i) {
std::string label{"connid_" + to_string(metrics.stats.peers[i].connection_id)};
auto add_and_set_gauge = [&](const std::string& label_value,
const auto& value) {
auto& gauge = p2p_connections.Add({{label, label_value}});
auto& peer = metrics.stats.peers[i];
auto& conn_id = peer.unique_conn_node_id;

auto addr = boost::asio::ip::make_address_v6(peer.address).to_string();
p2p_metrics.addr.Add({{"connid", conn_id},{"ipv6", addr},{"address", peer.p2p_address}});

auto add_and_set_gauge = [&](auto& fam, const auto& value) {
auto& gauge = fam.Add({{"connid", conn_id}});
gauge.Set(value);
};
auto& peer = metrics.stats.peers[i];
auto addr = std::string("addr_") + boost::asio::ip::make_address_v6(peer.address).to_string();
add_and_set_gauge(addr, 0); // Empty gauge; ipv6 address can't be transmitted as a double
add_and_set_gauge("port", peer.port);
add_and_set_gauge("accepting_blocks", peer.accepting_blocks);
add_and_set_gauge("last_received_block", peer.last_received_block);
add_and_set_gauge("first_available_block", peer.first_available_block);
add_and_set_gauge("last_available_block", peer.last_available_block);
add_and_set_gauge("unique_first_block_count", peer.unique_first_block_count);
add_and_set_gauge("latency", peer.latency);
add_and_set_gauge("bytes_received", peer.bytes_received);
add_and_set_gauge("last_bytes_received", peer.last_bytes_received.count());
add_and_set_gauge("bytes_sent", peer.bytes_sent);
add_and_set_gauge("last_bytes_sent", peer.last_bytes_sent.count());
add_and_set_gauge("connection_start_time", peer.connection_start_time.count());
add_and_set_gauge(peer.log_p2p_address, 0); // Empty gauge; we only want the label

add_and_set_gauge(p2p_metrics.connection_number, peer.connection_id);
add_and_set_gauge(p2p_metrics.port, peer.port);
add_and_set_gauge(p2p_metrics.accepting_blocks, peer.accepting_blocks);
add_and_set_gauge(p2p_metrics.last_received_block, peer.last_received_block);
add_and_set_gauge(p2p_metrics.first_available_block, peer.first_available_block);
add_and_set_gauge(p2p_metrics.last_available_block, peer.last_available_block);
add_and_set_gauge(p2p_metrics.unique_first_block_count, peer.unique_first_block_count);
add_and_set_gauge(p2p_metrics.latency, peer.latency);
add_and_set_gauge(p2p_metrics.bytes_received, peer.bytes_received);
add_and_set_gauge(p2p_metrics.last_bytes_received, peer.last_bytes_received.count());
add_and_set_gauge(p2p_metrics.bytes_sent, peer.bytes_sent);
add_and_set_gauge(p2p_metrics.last_bytes_sent, peer.last_bytes_sent.count());
add_and_set_gauge(p2p_metrics.connection_start_time, peer.connection_start_time.count());
}
}

Expand Down
3 changes: 2 additions & 1 deletion tests/nodeos_run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@

abs_path = os.path.abspath(os.getcwd() + '/unittests/contracts/eosio.token/eosio.token.abi')
traceNodeosArgs=" --http-max-response-time-ms 990000 --trace-rpc-abi eosio.token=" + abs_path
extraNodeosArgs=traceNodeosArgs + " --plugin eosio::prometheus_plugin --database-map-mode mapped_private "
specificNodeosInstances={0: "bin/nodeos"}
if cluster.launch(totalNodes=2, prodCount=prodCount, onlyBios=onlyBios, dontBootstrap=dontBootstrap, extraNodeosArgs=traceNodeosArgs, specificNodeosInstances=specificNodeosInstances) is False:
if cluster.launch(totalNodes=2, prodCount=prodCount, onlyBios=onlyBios, dontBootstrap=dontBootstrap, extraNodeosArgs=extraNodeosArgs, specificNodeosInstances=specificNodeosInstances) is False:
cmdError("launcher")
errorExit("Failed to stand up eos cluster.")
else:
Expand Down
88 changes: 45 additions & 43 deletions tools/net-util.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,16 @@ def __init__(self):
('nodeos_info', 'earliest_available_block_num'): 'Earliest Available Block:',
'nodeos_head_block_num': 'Head Block Num:',
'nodeos_last_irreversible': 'LIB:',
('nodeos_p2p_connections','in'): 'Inbound P2P Connections:',
('nodeos_p2p_connections','out'): 'Outbound P2P Connections:',
'nodeos_p2p_clients': 'Inbound P2P Connections:',
'nodeos_p2p_peers': 'Outbound P2P Connections:',
'nodeos_blocks_incoming_total': 'Total Incoming Blocks:',
'nodeos_trxs_incoming_total': 'Total Incoming Trxs:',
'nodeos_blocks_produced_total': 'Blocks Produced:',
'nodeos_trxs_produced_total': 'Trxs Produced:',
'nodeos_scheduled_trxs_total': 'Scheduled Trxs:',
'nodeos_unapplied_transactions_total': 'Unapplied Trxs:',
'nodeos_dropped_trxs_total': 'Dropped Trxs:',
'nodeos_failed_p2p_connections_total': 'Failed P2P Connections:',
'nodeos_p2p_dropped_trxs_total': 'Dropped Trxs:',
'nodeos_p2p_failed_connections_total': 'Failed P2P Connections:',
'nodeos_http_requests_total': 'HTTP Requests:',
}
self.ignoredPrometheusMetrics = [
Expand Down Expand Up @@ -301,53 +301,55 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0):
for family in text_string_to_metric_families(response.text):
bandwidths = {}
for sample in family.samples:
listwalker = getattr(self, 'connectionIDLW')
if "connid" in sample.labels:
connID = sample.labels["connid"]
if connID not in listwalker:
startOffset = endOffset = len(listwalker)
listwalker.append(AttrMap(Text(connID), None, 'reversed'))
else:
startOffset = listwalker.index(connID)
endOffset = startOffset + 1
if sample.name in self.prometheusMetrics:
fieldName = self.fields.get(self.prometheusMetrics[sample.name])
field = getattr(self, fieldName)
field.set_text(str(int(sample.value)))
elif sample.name == 'nodeos_p2p_addr':
listwalker = getattr(self, 'ipAddressLW')
addr = ipaddress.ip_address(sample.labels["ipv6"])
host = f'{str(addr.ipv4_mapped) if addr.ipv4_mapped else str(addr)}'
listwalker[startOffset:endOffset] = [AttrMap(Text(host), None, 'reversed')]
listwalker = getattr(self, 'hostnameLW')
addr = sample.labels["address"]
listwalker[startOffset:endOffset] = [AttrMap(Text(addr), None, 'reversed')]
elif sample.name == 'nodeos_p2p_bytes_sent':
bytesSent = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.bytesSent = bytesSent
bandwidths[connID] = stats
elif sample.name == 'nodeos_p2p_bytes_received':
bytesReceived = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.bytesReceived = bytesReceived
bandwidths[connID] = stats
elif sample.name == 'nodeos_p2p_connection_start_time':
connectionStarted = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.connectionStarted = connectionStarted
bandwidths[connID] = stats
elif sample.name == 'nodeos_p2p_connection_number':
pass
elif sample.name.startswith('nodeos_p2p_'):
fieldName = sample.name[len('nodeos_p2p_'):]
attrname = fieldName[:1] + fieldName.replace('_', ' ').title().replace(' ', '')[1:] + 'LW'
if hasattr(self, attrname):
listwalker = getattr(self, attrname)
listwalker[startOffset:endOffset] = [AttrMap(Text(self.peerMetricConversions[fieldName](sample.value)), None, 'reversed')]
elif sample.name == 'nodeos_p2p_connections':
if 'direction' in sample.labels:
fieldName = self.fields.get(self.prometheusMetrics[(sample.name, sample.labels['direction'])])
field = getattr(self, fieldName)
field.set_text(str(int(sample.value)))
else:
connID = next(iter(sample.labels))
fieldName = sample.labels[connID]
listwalker = getattr(self, 'connectionIDLW')
if connID not in listwalker:
startOffset = endOffset = len(listwalker)
listwalker.append(AttrMap(Text(connID), None, 'reversed'))
else:
startOffset = listwalker.index(connID)
endOffset = startOffset + 1
if fieldName.startswith('addr_'):
listwalker = getattr(self, 'ipAddressLW')
addr = ipaddress.ip_address(fieldName[len('addr_'):])
host = f'{str(addr.ipv4_mapped) if addr.ipv4_mapped else str(addr)}'
listwalker[startOffset:endOffset] = [AttrMap(Text(host), None, 'reversed')]
elif fieldName == 'bytes_received':
bytesReceived = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.bytesReceived = bytesReceived
bandwidths[connID] = stats
elif fieldName == 'bytes_sent':
bytesSent = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.bytesSent = bytesSent
bandwidths[connID] = stats
elif fieldName == 'connection_start_time':
connectionStarted = int(sample.value)
stats = bandwidths.get(connID, bandwidthStats())
stats.connectionStarted = connectionStarted
bandwidths[connID] = stats
else:
attrname = fieldName[:1] + fieldName.replace('_', ' ').title().replace(' ', '')[1:] + 'LW'
if hasattr(self, attrname):
listwalker = getattr(self, attrname)
listwalker[startOffset:endOffset] = [AttrMap(Text(self.peerMetricConversions[fieldName](sample.value)), None, 'reversed')]
else:
listwalker = getattr(self, 'hostnameLW')
listwalker[startOffset:endOffset] = [AttrMap(Text(fieldName.replace('_', '.')), None, 'reversed')]
elif sample.name == 'nodeos_info':
for infoLabel, infoValue in sample.labels.items():
fieldName = self.fields.get(self.prometheusMetrics[(sample.name, infoLabel)])
Expand All @@ -360,7 +362,7 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0):
if sample.name not in self.ignoredPrometheusMetrics:
logger.warning(f'Received unhandled Prometheus metric {sample.name}')
else:
if sample.name == 'nodeos_p2p_connections':
if sample.name == 'nodeos_p2p_bytes_sent' or sample.name == 'nodeos_p2p_bytes_received':
now = time.time_ns()
connIDListwalker = getattr(self, 'connectionIDLW')
for connID, stats in bandwidths.items():
Expand Down

0 comments on commit d913a2e

Please sign in to comment.