From d913a2ee5fd15db091b65aad725dbc81ee1feece Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 11 Oct 2023 11:01:40 -0500 Subject: [PATCH] GH-1683 Add prometheus plugin for easier manual testing of prometheus. Add mapped_private database-map-mode since none of the tests currently use it. --- plugins/prometheus_plugin/metrics.hpp | 101 +++++++++++++++++--------- tests/nodeos_run_test.py | 3 +- tools/net-util.py | 88 +++++++++++----------- 3 files changed, 114 insertions(+), 78 deletions(-) diff --git a/plugins/prometheus_plugin/metrics.hpp b/plugins/prometheus_plugin/metrics.hpp index b9de6b6435..c2d412e1ce 100644 --- a/plugins/prometheus_plugin/metrics.hpp +++ b/plugins/prometheus_plugin/metrics.hpp @@ -33,18 +33,34 @@ struct catalog_type { // http plugin prometheus::Family& http_request_counts; - // net plugin p2p-connections - prometheus::Family& p2p_connections; - - Gauge& num_peers; - Gauge& num_clients; - // net plugin failed p2p connection Counter& failed_p2p_connections; // net plugin dropped_trxs Counter& dropped_trxs_total; + struct p2p_connection_metrics { + Gauge& num_peers; + Gauge& num_clients; + + prometheus::Family& addr; // Empty gauge; ipv6 address can't be transmitted as a double + prometheus::Family& port; + prometheus::Family& connection_number; + prometheus::Family& accepting_blocks; + prometheus::Family& last_received_block; + prometheus::Family& first_available_block; + prometheus::Family& last_available_block; + prometheus::Family& unique_first_block_count; + prometheus::Family& latency; + prometheus::Family& bytes_received; + prometheus::Family& last_bytes_received; + prometheus::Family& bytes_sent; + prometheus::Family& last_bytes_sent; + prometheus::Family& connection_start_time; + prometheus::Family& peer_addr; // Empty gauge; we only want the label + }; + p2p_connection_metrics p2p_metrics; + // producer plugin prometheus::Family& cpu_usage_us; prometheus::Family& net_usage_us; @@ -97,12 +113,27 @@ struct catalog_type { catalog_type() : info(family("nodeos", "static information about the server")) , http_request_counts(family("nodeos_http_requests_total", "number of HTTP requests")) - , p2p_connections(family("nodeos_p2p_connections", "current number of connected p2p connections")) - , num_peers(p2p_connections.Add({{"direction", "out"}})) - , num_clients(p2p_connections.Add({{"direction", "in"}})) - , failed_p2p_connections( - build("nodeos_failed_p2p_connections", "total number of failed out-going p2p connections")) - , dropped_trxs_total(build("nodeos_dropped_trxs_total", "total number of dropped transactions by net plugin")) + , failed_p2p_connections(build("nodeos_p2p_failed_connections", "total number of failed out-going p2p connections")) + , dropped_trxs_total(build("nodeos_p2p_dropped_trxs_total", "total number of dropped transactions by net plugin")) + , p2p_metrics{ + .num_peers{build("nodeos_p2p_peers", "current number of connected outgoing peers")} + , .num_clients{build("nodeos_p2p_clients", "current number of connected incoming clients")} + , .addr{family("nodeos_p2p_addr", "ipv6 address")} + , .port{family("nodeos_p2p_port", "port")} + , .connection_number{family("nodeos_p2p_connection_number", "monatomic increasing connection number")} + , .accepting_blocks{family("nodeos_p2p_accepting_blocks", "accepting blocks on connection")} + , .last_received_block{family("nodeos_p2p_last_received_block", "last received block on connection")} + , .first_available_block{family("nodeos_p2p_first_available_block", "first block available from connection")} + , .last_available_block{family("nodeos_p2p_last_available_block", "last block available from connection")} + , .unique_first_block_count{family("nodeos_p2p_unique_first_block_count", "number of blocks first received from any connection on this connection")} + , .latency{family("nodeos_p2p_latency", "last calculated latency with connection")} + , .bytes_received{family("nodeos_p2p_bytes_received", "total bytes received on connection")} + , .last_bytes_received{family("nodeos_p2p_last_bytes_received", "last time anything received from peer")} + , .bytes_sent{family("nodeos_p2p_bytes_sent", "total bytes sent to peer")} + , .last_bytes_sent{family("nodeos_p2p_last_bytes_sent", "last time anything sent to peer")} + , .connection_start_time{family("nodeos_p2p_connection_start_time", "time of last connection to peer")} + , .peer_addr{family("nodeos_p2p_peer_addr", "peer address")} + } , cpu_usage_us(family("nodeos_cpu_usage_us_total", "total cpu usage in microseconds for blocks")) , net_usage_us(family("nodeos_net_usage_us_total", "total net usage in microseconds for blocks")) , last_irreversible(build("nodeos_last_irreversible", "last irreversible block number")) @@ -164,31 +195,33 @@ struct catalog_type { } void update(const net_plugin::p2p_connections_metrics& metrics) { - num_peers.Set(metrics.num_peers); - num_clients.Set(metrics.num_clients); + p2p_metrics.num_peers.Set(metrics.num_peers); + p2p_metrics.num_clients.Set(metrics.num_clients); for(size_t i = 0; i < metrics.stats.peers.size(); ++i) { - std::string label{"connid_" + to_string(metrics.stats.peers[i].connection_id)}; - auto add_and_set_gauge = [&](const std::string& label_value, - const auto& value) { - auto& gauge = p2p_connections.Add({{label, label_value}}); + auto& peer = metrics.stats.peers[i]; + auto& conn_id = peer.unique_conn_node_id; + + auto addr = boost::asio::ip::make_address_v6(peer.address).to_string(); + p2p_metrics.addr.Add({{"connid", conn_id},{"ipv6", addr},{"address", peer.p2p_address}}); + + auto add_and_set_gauge = [&](auto& fam, const auto& value) { + auto& gauge = fam.Add({{"connid", conn_id}}); gauge.Set(value); }; - auto& peer = metrics.stats.peers[i]; - auto addr = std::string("addr_") + boost::asio::ip::make_address_v6(peer.address).to_string(); - add_and_set_gauge(addr, 0); // Empty gauge; ipv6 address can't be transmitted as a double - add_and_set_gauge("port", peer.port); - add_and_set_gauge("accepting_blocks", peer.accepting_blocks); - add_and_set_gauge("last_received_block", peer.last_received_block); - add_and_set_gauge("first_available_block", peer.first_available_block); - add_and_set_gauge("last_available_block", peer.last_available_block); - add_and_set_gauge("unique_first_block_count", peer.unique_first_block_count); - add_and_set_gauge("latency", peer.latency); - add_and_set_gauge("bytes_received", peer.bytes_received); - add_and_set_gauge("last_bytes_received", peer.last_bytes_received.count()); - add_and_set_gauge("bytes_sent", peer.bytes_sent); - add_and_set_gauge("last_bytes_sent", peer.last_bytes_sent.count()); - add_and_set_gauge("connection_start_time", peer.connection_start_time.count()); - add_and_set_gauge(peer.log_p2p_address, 0); // Empty gauge; we only want the label + + add_and_set_gauge(p2p_metrics.connection_number, peer.connection_id); + add_and_set_gauge(p2p_metrics.port, peer.port); + add_and_set_gauge(p2p_metrics.accepting_blocks, peer.accepting_blocks); + add_and_set_gauge(p2p_metrics.last_received_block, peer.last_received_block); + add_and_set_gauge(p2p_metrics.first_available_block, peer.first_available_block); + add_and_set_gauge(p2p_metrics.last_available_block, peer.last_available_block); + add_and_set_gauge(p2p_metrics.unique_first_block_count, peer.unique_first_block_count); + add_and_set_gauge(p2p_metrics.latency, peer.latency); + add_and_set_gauge(p2p_metrics.bytes_received, peer.bytes_received); + add_and_set_gauge(p2p_metrics.last_bytes_received, peer.last_bytes_received.count()); + add_and_set_gauge(p2p_metrics.bytes_sent, peer.bytes_sent); + add_and_set_gauge(p2p_metrics.last_bytes_sent, peer.last_bytes_sent.count()); + add_and_set_gauge(p2p_metrics.connection_start_time, peer.connection_start_time.count()); } } diff --git a/tests/nodeos_run_test.py b/tests/nodeos_run_test.py index 3b31996a9b..59b6fa68d9 100755 --- a/tests/nodeos_run_test.py +++ b/tests/nodeos_run_test.py @@ -61,8 +61,9 @@ abs_path = os.path.abspath(os.getcwd() + '/unittests/contracts/eosio.token/eosio.token.abi') traceNodeosArgs=" --http-max-response-time-ms 990000 --trace-rpc-abi eosio.token=" + abs_path + extraNodeosArgs=traceNodeosArgs + " --plugin eosio::prometheus_plugin --database-map-mode mapped_private " specificNodeosInstances={0: "bin/nodeos"} - if cluster.launch(totalNodes=2, prodCount=prodCount, onlyBios=onlyBios, dontBootstrap=dontBootstrap, extraNodeosArgs=traceNodeosArgs, specificNodeosInstances=specificNodeosInstances) is False: + if cluster.launch(totalNodes=2, prodCount=prodCount, onlyBios=onlyBios, dontBootstrap=dontBootstrap, extraNodeosArgs=extraNodeosArgs, specificNodeosInstances=specificNodeosInstances) is False: cmdError("launcher") errorExit("Failed to stand up eos cluster.") else: diff --git a/tools/net-util.py b/tools/net-util.py index 7dff39a70f..be63176de5 100755 --- a/tools/net-util.py +++ b/tools/net-util.py @@ -96,16 +96,16 @@ def __init__(self): ('nodeos_info', 'earliest_available_block_num'): 'Earliest Available Block:', 'nodeos_head_block_num': 'Head Block Num:', 'nodeos_last_irreversible': 'LIB:', - ('nodeos_p2p_connections','in'): 'Inbound P2P Connections:', - ('nodeos_p2p_connections','out'): 'Outbound P2P Connections:', + 'nodeos_p2p_clients': 'Inbound P2P Connections:', + 'nodeos_p2p_peers': 'Outbound P2P Connections:', 'nodeos_blocks_incoming_total': 'Total Incoming Blocks:', 'nodeos_trxs_incoming_total': 'Total Incoming Trxs:', 'nodeos_blocks_produced_total': 'Blocks Produced:', 'nodeos_trxs_produced_total': 'Trxs Produced:', 'nodeos_scheduled_trxs_total': 'Scheduled Trxs:', 'nodeos_unapplied_transactions_total': 'Unapplied Trxs:', - 'nodeos_dropped_trxs_total': 'Dropped Trxs:', - 'nodeos_failed_p2p_connections_total': 'Failed P2P Connections:', + 'nodeos_p2p_dropped_trxs_total': 'Dropped Trxs:', + 'nodeos_p2p_failed_connections_total': 'Failed P2P Connections:', 'nodeos_http_requests_total': 'HTTP Requests:', } self.ignoredPrometheusMetrics = [ @@ -301,53 +301,55 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0): for family in text_string_to_metric_families(response.text): bandwidths = {} for sample in family.samples: + listwalker = getattr(self, 'connectionIDLW') + if "connid" in sample.labels: + connID = sample.labels["connid"] + if connID not in listwalker: + startOffset = endOffset = len(listwalker) + listwalker.append(AttrMap(Text(connID), None, 'reversed')) + else: + startOffset = listwalker.index(connID) + endOffset = startOffset + 1 if sample.name in self.prometheusMetrics: fieldName = self.fields.get(self.prometheusMetrics[sample.name]) field = getattr(self, fieldName) field.set_text(str(int(sample.value))) + elif sample.name == 'nodeos_p2p_addr': + listwalker = getattr(self, 'ipAddressLW') + addr = ipaddress.ip_address(sample.labels["ipv6"]) + host = f'{str(addr.ipv4_mapped) if addr.ipv4_mapped else str(addr)}' + listwalker[startOffset:endOffset] = [AttrMap(Text(host), None, 'reversed')] + listwalker = getattr(self, 'hostnameLW') + addr = sample.labels["address"] + listwalker[startOffset:endOffset] = [AttrMap(Text(addr), None, 'reversed')] + elif sample.name == 'nodeos_p2p_bytes_sent': + bytesSent = int(sample.value) + stats = bandwidths.get(connID, bandwidthStats()) + stats.bytesSent = bytesSent + bandwidths[connID] = stats + elif sample.name == 'nodeos_p2p_bytes_received': + bytesReceived = int(sample.value) + stats = bandwidths.get(connID, bandwidthStats()) + stats.bytesReceived = bytesReceived + bandwidths[connID] = stats + elif sample.name == 'nodeos_p2p_connection_start_time': + connectionStarted = int(sample.value) + stats = bandwidths.get(connID, bandwidthStats()) + stats.connectionStarted = connectionStarted + bandwidths[connID] = stats + elif sample.name == 'nodeos_p2p_connection_number': + pass + elif sample.name.startswith('nodeos_p2p_'): + fieldName = sample.name[len('nodeos_p2p_'):] + attrname = fieldName[:1] + fieldName.replace('_', ' ').title().replace(' ', '')[1:] + 'LW' + if hasattr(self, attrname): + listwalker = getattr(self, attrname) + listwalker[startOffset:endOffset] = [AttrMap(Text(self.peerMetricConversions[fieldName](sample.value)), None, 'reversed')] elif sample.name == 'nodeos_p2p_connections': if 'direction' in sample.labels: fieldName = self.fields.get(self.prometheusMetrics[(sample.name, sample.labels['direction'])]) field = getattr(self, fieldName) field.set_text(str(int(sample.value))) - else: - connID = next(iter(sample.labels)) - fieldName = sample.labels[connID] - listwalker = getattr(self, 'connectionIDLW') - if connID not in listwalker: - startOffset = endOffset = len(listwalker) - listwalker.append(AttrMap(Text(connID), None, 'reversed')) - else: - startOffset = listwalker.index(connID) - endOffset = startOffset + 1 - if fieldName.startswith('addr_'): - listwalker = getattr(self, 'ipAddressLW') - addr = ipaddress.ip_address(fieldName[len('addr_'):]) - host = f'{str(addr.ipv4_mapped) if addr.ipv4_mapped else str(addr)}' - listwalker[startOffset:endOffset] = [AttrMap(Text(host), None, 'reversed')] - elif fieldName == 'bytes_received': - bytesReceived = int(sample.value) - stats = bandwidths.get(connID, bandwidthStats()) - stats.bytesReceived = bytesReceived - bandwidths[connID] = stats - elif fieldName == 'bytes_sent': - bytesSent = int(sample.value) - stats = bandwidths.get(connID, bandwidthStats()) - stats.bytesSent = bytesSent - bandwidths[connID] = stats - elif fieldName == 'connection_start_time': - connectionStarted = int(sample.value) - stats = bandwidths.get(connID, bandwidthStats()) - stats.connectionStarted = connectionStarted - bandwidths[connID] = stats - else: - attrname = fieldName[:1] + fieldName.replace('_', ' ').title().replace(' ', '')[1:] + 'LW' - if hasattr(self, attrname): - listwalker = getattr(self, attrname) - listwalker[startOffset:endOffset] = [AttrMap(Text(self.peerMetricConversions[fieldName](sample.value)), None, 'reversed')] - else: - listwalker = getattr(self, 'hostnameLW') - listwalker[startOffset:endOffset] = [AttrMap(Text(fieldName.replace('_', '.')), None, 'reversed')] elif sample.name == 'nodeos_info': for infoLabel, infoValue in sample.labels.items(): fieldName = self.fields.get(self.prometheusMetrics[(sample.name, infoLabel)]) @@ -360,7 +362,7 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0): if sample.name not in self.ignoredPrometheusMetrics: logger.warning(f'Received unhandled Prometheus metric {sample.name}') else: - if sample.name == 'nodeos_p2p_connections': + if sample.name == 'nodeos_p2p_bytes_sent' or sample.name == 'nodeos_p2p_bytes_received': now = time.time_ns() connIDListwalker = getattr(self, 'connectionIDLW') for connID, stats in bandwidths.items():