From e68743ab5b2e2a1472a9eb663822ac3c134a4bba Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 22 Aug 2023 03:16:45 -0500 Subject: [PATCH 01/46] Support throttling block syncing to peers. WIP --- .../include/eosio/net_plugin/net_plugin.hpp | 1 + .../include/eosio/net_plugin/protocol.hpp | 3 +- plugins/net_plugin/net_plugin.cpp | 93 +++++++++++--- plugins/prometheus_plugin/metrics.hpp | 1 + tests/CMakeLists.txt | 3 + tests/p2p_sync_throttle_test.py | 119 ++++++++++++++++++ tools/net-util.py | 33 ++--- 7 files changed, 223 insertions(+), 30 deletions(-) create mode 100755 tests/p2p_sync_throttle_test.py diff --git a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp index d0c482e5b1..1db805ac4f 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp @@ -54,6 +54,7 @@ namespace eosio { std::chrono::nanoseconds last_bytes_received{0}; size_t bytes_sent{0}; std::chrono::nanoseconds last_bytes_sent{0}; + size_t block_sync_bytes_sent{0}; std::chrono::nanoseconds connection_start_time{0}; std::string log_p2p_address; }; diff --git a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp index 5ca2ba1456..7e292e7bf2 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp @@ -16,9 +16,10 @@ namespace eosio { // Longest domain name is 253 characters according to wikipedia. // Addresses include ":port" where max port is 65535, which adds 6 chars. + // Addresses may also include ":bitrate" with suffix and separators, which adds 30 chars. // We also add our own extentions of "[:trx|:blk] - xxxxxxx", which adds 14 chars, total= 273. // Allow for future extentions as well, hence 384. - constexpr size_t max_p2p_address_length = 253 + 6; + constexpr size_t max_p2p_address_length = 253 + 6 + 30; constexpr size_t max_handshake_str_length = 384; struct handshake_message { diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index f7f5d5d51c..0634d21557 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -29,6 +29,7 @@ #include #include #include +#include // should be defined for c++17, but clang++16 still has not implemented it #ifdef __cpp_lib_hardware_interference_size @@ -535,7 +536,7 @@ namespace eosio { bool in_sync() const; fc::logger& get_logger() { return logger; } - void create_session(tcp::socket&& socket, const string listen_address); + void create_session(tcp::socket&& socket, const string listen_address, const string limit); }; // peer_[x]log must be called from thread in connection strand @@ -771,7 +772,7 @@ namespace eosio { /// @brief ctor /// @param socket created by boost::asio in fc::listener /// @param address identifier of listen socket which accepted this new connection - explicit connection( tcp::socket&& socket, const string& listen_address ); + explicit connection( tcp::socket&& socket, const string& listen_address, const string& limit_str ); ~connection() = default; connection( const connection& ) = delete; @@ -800,6 +801,7 @@ namespace eosio { std::chrono::nanoseconds get_last_bytes_received() const { return last_bytes_received.load(); } size_t get_bytes_sent() const { return bytes_sent.load(); } std::chrono::nanoseconds get_last_bytes_sent() const { return last_bytes_sent.load(); } + size_t get_block_sync_bytes_sent() const { return block_sync_bytes_sent.load(); } boost::asio::ip::port_type get_remote_endpoint_port() const { return remote_endpoint_port.load(); } void set_heartbeat_timeout(std::chrono::milliseconds msec) { hb_timeout = msec; @@ -809,6 +811,7 @@ namespace eosio { private: static const string unknown; + static const std::map prefix_multipliers; std::atomic peer_ping_time_ns = std::numeric_limits::max(); @@ -827,6 +830,8 @@ namespace eosio { blocks_only }; + size_t block_sync_rate_limit{0}; // bytes/second, default unlimited + std::atomic connection_type{both}; std::atomic peer_start_block_num{0}; std::atomic peer_head_block_num{0}; @@ -835,6 +840,7 @@ namespace eosio { std::atomic bytes_received{0}; std::atomic last_bytes_received{0ns}; std::atomic bytes_sent{0}; + std::atomic block_sync_bytes_sent{0}; std::atomic last_bytes_sent{0ns}; std::atomic remote_endpoint_port{0}; @@ -980,7 +986,7 @@ namespace eosio { void stop_send(); void enqueue( const net_message &msg ); - void enqueue_block( const signed_block_ptr& sb, bool to_sync_queue = false); + size_t enqueue_block( const signed_block_ptr& sb, bool to_sync_queue = false); void enqueue_buffer( const std::shared_ptr>& send_buffer, go_away_reason close_after_send, bool to_sync_queue = false); @@ -1055,6 +1061,10 @@ namespace eosio { }; // class connection const string connection::unknown = ""; + const std::map connection::prefix_multipliers{ + {"",1},{"K",pow(10,3)},{"M",pow(10,6)},{"G",pow(10, 9)},{"T",pow(10, 12)}, + {"Ki",pow(2,10)},{"Mi",pow(2,20)},{"Gi",pow(2,30)},{"Ti",pow(2,40)} + }; // called from connection strand struct msg_handler : public fc::visitor { @@ -1185,7 +1195,7 @@ namespace eosio { fc_ilog( logger, "created connection ${c} to ${n}", ("c", connection_id)("n", endpoint) ); } - connection::connection(tcp::socket&& s, const string& listen_address) + connection::connection(tcp::socket&& s, const string& listen_address, const string& limit_str) : listen_address( listen_address ), peer_addr(), strand( my_impl->thread_pool.get_executor() ), @@ -1195,6 +1205,27 @@ namespace eosio { last_handshake_recv(), last_handshake_sent() { + std::istringstream in(limit_str); + fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); + in.imbue(std::locale("")); + double limit{0}; + in >> limit; + if( limit > 0.0f ) { + std::string units; + in >> units; + std::regex units_regex{"([KMGT]?[i]?)B/s"}; + std::smatch units_match; + std::regex_match(units, units_match, units_regex); + if( units_match.size() == 2 ) { + block_sync_rate_limit = static_cast(limit * prefix_multipliers.at(units_match[1].str())); + peer_dlog( this, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); + } else { + fc_wlog( logger, "listen address ${la} has invalid block sync limit specification, connection will not be throttled", ("la", listen_address)); + block_sync_rate_limit = 0; + } + } else { + block_sync_rate_limit = static_cast(limit); + } update_endpoints(); fc_dlog( logger, "new connection object created for peer ${address}:${port} from listener ${addr}", ("address", log_remote_endpoint_ip)("port", log_remote_endpoint_port)("addr", listen_address) ); } @@ -1661,7 +1692,7 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { - enqueue_block( sb, true ); + block_sync_bytes_sent += enqueue_block( sb, true ); } else { peer_ilog( this, "enqueue sync, unable to fetch block ${num}, sending benign_other go away", ("num", num) ); peer_requested.reset(); // unable to provide requested blocks @@ -1781,14 +1812,29 @@ namespace eosio { } // called from connection strand - void connection::enqueue_block( const signed_block_ptr& b, bool to_sync_queue) { + size_t connection::enqueue_block( const signed_block_ptr& b, bool to_sync_queue) { peer_dlog( this, "enqueue block ${num}", ("num", b->block_num()) ); verify_strand_in_this_thread( strand, __func__, __LINE__ ); block_buffer_factory buff_factory; auto sb = buff_factory.get_send_buffer( b ); latest_blk_time = std::chrono::system_clock::now(); - enqueue_buffer( sb, no_reason, to_sync_queue); + if( block_sync_rate_limit > 0 ) { + peer_dlog( this, "block_sync_rate_limit is set to ${l}", ("l", block_sync_rate_limit)); + while( true) { + auto elapsed = std::chrono::time_point_cast(latest_blk_time) - connection_start_time; + auto current_rate = block_sync_bytes_sent / elapsed.time_since_epoch().count(); + if( current_rate < block_sync_rate_limit ) { + enqueue_buffer( sb, no_reason, to_sync_queue); + break; + } + peer_dlog( this, "throttling sending to peer ${remote}", ("remote", log_remote_endpoint_ip)); + usleep(100); + } + } else { + enqueue_buffer( sb, no_reason, to_sync_queue); + } + return sb->size(); } // called from connection strand @@ -2719,7 +2765,7 @@ namespace eosio { } - void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address) { + void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, const string limit) { uint32_t visitors = 0; uint32_t from_addr = 0; boost::system::error_code rec; @@ -2745,7 +2791,7 @@ namespace eosio { visitors < connections.get_max_client_count())) { fc_ilog(logger, "Accepted new connection: " + paddr_str); - connection_ptr new_connection = std::make_shared(std::move(socket), listen_address); + connection_ptr new_connection = std::make_shared(std::move(socket), listen_address, limit); new_connection->strand.post([new_connection, this]() { if (new_connection->start_session()) { connections.add(new_connection); @@ -3927,16 +3973,24 @@ namespace eosio { void net_plugin::set_program_options( options_description& /*cli*/, options_description& cfg ) { cfg.add_options() - ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876")) ), "The actual host:port used to listen for incoming p2p connections. May be used multiple times.") + ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876:0")) ), "The actual host:port used to listen for incoming p2p connections. May be used multiple times. " + "Block syncing to all peers connected via the port will be throttled to the specified rate. " + "See the 'p2p-peer-address' argument for format details.") ( "p2p-server-address", bpo::value< vector >(), "An externally accessible host:port for identifying this node. Defaults to p2p-listen-endpoint. May be used as many times as p2p-listen-endpoint. If provided, the first address will be used in handshakes with other nodes. Otherwise the default is used.") ( "p2p-peer-address", bpo::value< vector >()->composing(), "The public endpoint of a peer node to connect to. Use multiple p2p-peer-address options as needed to compose a network.\n" - " Syntax: host:port[:|]\n" + " Syntax: host:port[:|][:]\n" " The optional 'trx' and 'blk' indicates to node that only transactions 'trx' or blocks 'blk' should be sent." + " The optional rate cap will limit block sync bandwidth to the specified rate. A number alone will be " + " interpreted as bytes per second. The number may be suffixed with units. Supported units are: " + " 'B/s', 'KB/s', 'MB/s, 'GB/s', and 'TB/s'. Transactions and blocks outside of sync mode are not throttled." " Examples:\n" " p2p.eos.io:9876\n" " p2p.trx.eos.io:9876:trx\n" - " p2p.blk.eos.io:9876:blk\n") + " p2p.blk.eos.io:9876:blk\n" + " p2p.eos.io:9876:1MB/s\n" + " p2p.blk.eos.io:9876:blk:250KB/s\n" + " p2p.eos.io:9876:0.5GB/s") ( "p2p-max-nodes-per-host", bpo::value()->default_value(def_max_nodes_per_host), "Maximum number of client nodes from any single IP address") ( "p2p-accept-transactions", bpo::value()->default_value(true), "Allow transactions received over p2p network to be evaluated and relayed if valid.") ( "p2p-auto-bp-peer", bpo::value< vector >()->composing(), @@ -4192,10 +4246,18 @@ namespace eosio { std::string extra_listening_log_info = ", max clients is " + std::to_string(my->connections.get_max_client_count()); - + + auto listen_addr = address; + auto limit = string("0"); + if( std::count(address.begin(), address.end(), ':') > 1 ) { + auto last_colon_location = address.rfind(':'); + listen_addr = std::string(address, 0, last_colon_location); + limit = std::string(address, last_colon_location+1); + } + fc::create_listener( - my->thread_pool.get_executor(), logger, accept_timeout, address, extra_listening_log_info, - [my = my, addr = p2p_addr](tcp::socket&& socket) { my->create_session(std::move(socket), addr); }); + my->thread_pool.get_executor(), logger, accept_timeout, listen_addr, extra_listening_log_info, + [my = my, addr = p2p_addr, limit = limit](tcp::socket&& socket) { fc_dlog( logger, "start listening on ${addr} with peer sync throttle ${limit}", ("addr", addr)("limit", limit)); my->create_session(std::move(socket), addr, limit); }); } catch (const std::exception& e) { fc_elog( logger, "net_plugin::plugin_startup failed to listen on ${addr}, ${what}", ("addr", address)("what", e.what()) ); @@ -4479,6 +4541,7 @@ namespace eosio { , .last_bytes_received = (*it)->get_last_bytes_received() , .bytes_sent = (*it)->get_bytes_sent() , .last_bytes_sent = (*it)->get_last_bytes_sent() + , .block_sync_bytes_sent = (*it)->get_block_sync_bytes_sent() , .connection_start_time = (*it)->connection_start_time , .log_p2p_address = (*it)->log_p2p_address }; diff --git a/plugins/prometheus_plugin/metrics.hpp b/plugins/prometheus_plugin/metrics.hpp index 873f65f3f4..ef82e5f2cb 100644 --- a/plugins/prometheus_plugin/metrics.hpp +++ b/plugins/prometheus_plugin/metrics.hpp @@ -190,6 +190,7 @@ struct catalog_type { add_and_set_gauge("last_bytes_received", peer.last_bytes_received.count()); add_and_set_gauge("bytes_sent", peer.bytes_sent); add_and_set_gauge("last_bytes_sent", peer.last_bytes_sent.count()); + add_and_set_gauge("block_sync_bytes_sent", peer.block_sync_bytes_sent); add_and_set_gauge("connection_start_time", peer.connection_start_time.count()); add_and_set_gauge(peer.log_p2p_address, 0); // Empty gauge; we only want the label } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 700abde685..e51901b817 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -51,6 +51,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/http_plugin_test.py ${CMAKE_CURRENT_B configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_high_latency_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_high_latency_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_multiple_listen_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_multiple_listen_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_no_listen_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_no_listen_test.py COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_sync_throttle_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_sync_throttle_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/compute_transaction_test.py ${CMAKE_CURRENT_BINARY_DIR}/compute_transaction_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/subjective_billing_test.py ${CMAKE_CURRENT_BINARY_DIR}/subjective_billing_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/get_account_test.py ${CMAKE_CURRENT_BINARY_DIR}/get_account_test.py COPYONLY) @@ -186,6 +187,8 @@ add_test(NAME p2p_multiple_listen_test COMMAND tests/p2p_multiple_listen_test.py set_property(TEST p2p_multiple_listen_test PROPERTY LABELS nonparallelizable_tests) add_test(NAME p2p_no_listen_test COMMAND tests/p2p_no_listen_test.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_property(TEST p2p_no_listen_test PROPERTY LABELS nonparallelizable_tests) +add_test(NAME p2p_sync_throttle_test COMMAND tests/p2p_sync_throttle_test.py -v -d 2 ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +set_property(TEST p2p_sync_throttle_test PROPERTY LABELS nonparallelizable_tests) # needs iproute-tc or iproute2 depending on platform #add_test(NAME p2p_high_latency_test COMMAND tests/p2p_high_latency_test.py -v WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py new file mode 100755 index 0000000000..13273b6efe --- /dev/null +++ b/tests/p2p_sync_throttle_test.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import signal +import time + +from TestHarness import Cluster, TestHelper, Utils, WalletMgr, CORE_SYMBOL, createAccountKeys +from TestHarness.TestHelper import AppArgs + +############################################################### +# p2p_sync_throttle_test +# +# Test throttling of a peer during block syncing. +# +############################################################### + + +Print=Utils.Print +errorExit=Utils.errorExit + +appArgs = AppArgs() +appArgs.add(flag='--plugin',action='append',type=str,help='Run nodes with additional plugins') +appArgs.add(flag='--connection-cleanup-period',type=int,help='Interval in whole seconds to run the connection reaper and metric collection') + +args=TestHelper.parse_args({"-p","-d","--keep-logs","--prod-count" + ,"--dump-error-details","-v","--leave-running" + ,"--unshared"}, + applicationSpecificArgs=appArgs) +pnodes=args.p +delay=args.d +debug=args.v +prod_count = args.prod_count +total_nodes=4 +dumpErrorDetails=args.dump_error_details + +Utils.Debug=debug +testSuccessful=False + +cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs) +walletMgr=WalletMgr(True) + +try: + TestHelper.printSystemInfo("BEGIN") + + cluster.setWalletMgr(walletMgr) + + Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}') + + Print("Stand up cluster") + if args.plugin: + extraNodeosArgs = ''.join([i+j for i,j in zip([' --plugin '] * len(args.plugin), args.plugin)]) + else: + extraNodeosArgs = '' + if cluster.launch(pnodes=pnodes, unstartedNodes=2, totalNodes=total_nodes, prodCount=prod_count, topo='line', + delay=delay, extraNodeosArgs=extraNodeosArgs) is False: + errorExit("Failed to stand up eos cluster.") + + prodNode = cluster.getNode(0) + nonProdNode = cluster.getNode(1) + + accounts=createAccountKeys(2) + if accounts is None: + Utils.errorExit("FAILURE - create keys") + + accounts[0].name="tester111111" + accounts[1].name="tester222222" + + account1PrivKey = accounts[0].activePrivateKey + account2PrivKey = accounts[1].activePrivateKey + + testWalletName="test" + + Print("Creating wallet \"%s\"." % (testWalletName)) + testWallet=walletMgr.create(testWalletName, [cluster.eosioAccount,accounts[0],accounts[1]]) + + # create accounts via eosio as otherwise a bid is needed + for account in accounts: + Print("Create new account %s via %s" % (account.name, cluster.eosioAccount.name)) + trans=nonProdNode.createInitializeAccount(account, cluster.eosioAccount, stakedDeposit=0, waitForTransBlock=True, stakeNet=1000, stakeCPU=1000, buyRAM=1000, exitOnError=True) + transferAmount="100000000.0000 {0}".format(CORE_SYMBOL) + Print("Transfer funds %s from account %s to %s" % (transferAmount, cluster.eosioAccount.name, account.name)) + nonProdNode.transferFunds(cluster.eosioAccount, account, transferAmount, "test transfer", waitForTransBlock=True) + trans=nonProdNode.delegatebw(account, 20000000.0000, 20000000.0000, waitForTransBlock=True, exitOnError=True) + + beginLargeBlocksHeadBlock = nonProdNode.getHeadBlockNum() + + Print("Configure and launch txn generators") + targetTpsPerGenerator = 100 + testTrxGenDurationSec=60 + trxGeneratorCnt=1 + cluster.launchTrxGenerators(contractOwnerAcctName=cluster.eosioAccount.name, acctNamesList=[accounts[0].name,accounts[1].name], + acctPrivKeysList=[account1PrivKey,account2PrivKey], nodeId=prodNode.nodeId, tpsPerGenerator=targetTpsPerGenerator, + numGenerators=trxGeneratorCnt, durationSec=testTrxGenDurationSec, waitToComplete=True) + + endLargeBlocksHeadBlock = nonProdNode.getHeadBlockNum() + + throttleNode = cluster.unstartedNodes[0] + i = throttleNode.cmd.index('--p2p-listen-endpoint') + throttleNode.cmd[i+1] = throttleNode.cmd[i+1] + ':100B/s' + + cluster.biosNode.kill(signal.SIGTERM) + clusterStart = time.time() + cluster.launchUnstarted(2) + + syncNode = cluster.getNode(3) + time.sleep(15) + throttleNode.waitForBlock(endLargeBlocksHeadBlock) + endUnthrottledSync = time.time() + syncNode.waitForBlock(endLargeBlocksHeadBlock) + endSync = time.time() + Print(f'Unthrottled sync time: {endUnthrottledSync - clusterStart} seconds') + Print(f'Throttled sync time: {endSync - clusterStart} seconds') + assert endSync - clusterStart > endUnthrottledSync - clusterStart + 10, 'Throttled sync time must be at least 10 seconds greater than unthrottled' + + testSuccessful=True +finally: + TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails) + +exitCode = 0 if testSuccessful else 1 +exit(exitCode) diff --git a/tools/net-util.py b/tools/net-util.py index 01d0862749..66f48d8ee4 100755 --- a/tools/net-util.py +++ b/tools/net-util.py @@ -35,7 +35,7 @@ def humanReadableBytesPerSecond(bytes: int, telco:bool = False): while bytes > power: bytes /= power n += 1 - return f'{"~0" if bytes < 0.01 else format(bytes, ".2f")} {labels[n]}B/s' + return f'{"-" if bytes == 0.0 else "~0" if bytes < 0.01 else format(bytes, ".2f")} {labels[n]}B/s' class TextSimpleFocusListWalker(urwid.SimpleFocusListWalker): @@ -165,6 +165,7 @@ def __init__(self): ('\nRcv\nRate', 'receiveBandwidthLW'), ('Last\nRcv\nTime', 'lastBytesReceivedLW'), ('Last\nRcvd\nBlock', 'lastReceivedBlockLW'), + ('Blk\nSync\nRate', 'blockSyncBandwidthLW'), ('Unique\nFirst\nBlks', 'uniqueFirstBlockCountLW'), ('First\nAvail\nBlk', 'firstAvailableBlockLW'), ('Last\nAvail\nBlk', 'lastAvailableBlockLW'), @@ -298,6 +299,7 @@ class bandwidthStats(): def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0): self.bytesReceived = 0 self.bytesSent = 0 + self.blockSyncBytesSent = 0 self.connectionStarted = 0 for family in text_string_to_metric_families(response.text): bandwidths = {} @@ -327,19 +329,20 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0): host = f'{str(addr.ipv4_mapped) if addr.ipv4_mapped else str(addr)}' listwalker[startOffset:endOffset] = [AttrMap(Text(host), None, 'reversed')] elif fieldName == 'bytes_received': - bytesReceived = int(sample.value) stats = bandwidths.get(connID, bandwidthStats()) - stats.bytesReceived = bytesReceived + stats.bytesReceived = int(sample.value) bandwidths[connID] = stats elif fieldName == 'bytes_sent': - bytesSent = int(sample.value) stats = bandwidths.get(connID, bandwidthStats()) - stats.bytesSent = bytesSent + stats.bytesSent = int(sample.value) + bandwidths[connID] = stats + elif fieldName == 'block_sync_bytes_sent': + stats = bandwidths.get(connID, bandwidthStats()) + stats.blockSyncBytesSent = int(sample.value) bandwidths[connID] = stats elif fieldName == 'connection_start_time': - connectionStarted = int(sample.value) stats = bandwidths.get(connID, bandwidthStats()) - stats.connectionStarted = connectionStarted + stats.connectionStarted = int(sample.value) bandwidths[connID] = stats else: attrname = fieldName[:1] + fieldName.replace('_', ' ').title().replace(' ', '')[1:] + 'LW' @@ -363,17 +366,19 @@ def __init__(self, bytesReceived=0, bytesSent=0, connectionStarted=0): else: if sample.name == 'nodeos_p2p_connections': now = time.time_ns() + def updateBandwidth(connectedSeconds, listwalker, byteCount, startOffset, endOffset): + bps = byteCount/connectedSeconds + listwalker[startOffset:endOffset] = [AttrMap(Text(humanReadableBytesPerSecond(bps)), None, 'reversed')] connIDListwalker = getattr(self, 'connectionIDLW') for connID, stats in bandwidths.items(): startOffset = connIDListwalker.index(connID) endOffset = startOffset + 1 - connected_seconds = (now - stats.connectionStarted)/1000000000 - listwalker = getattr(self, 'receiveBandwidthLW') - bps = stats.bytesReceived/connected_seconds - listwalker[startOffset:endOffset] = [AttrMap(Text(humanReadableBytesPerSecond(bps)), None, 'reversed')] - listwalker = getattr(self, 'sendBandwidthLW') - bps = stats.bytesSent/connected_seconds - listwalker[startOffset:endOffset] = [AttrMap(Text(humanReadableBytesPerSecond(bps)), None, 'reversed')] + connectedSeconds = (now - stats.connectionStarted)/1000000000 + for listwalkerName, attrName in [('receiveBandwidthLW', 'bytesReceived'), + ('sendBandwidthLW', 'bytesSent'), + ('blockSyncBandwidthLW', 'blockSyncBytesSent')]: + listwalker = getattr(self, listwalkerName) + updateBandwidth(connectedSeconds, listwalker, getattr(stats, attrName), startOffset, endOffset) mainLoop.set_alarm_in(float(self.args.refresh_interval), self.update) def exitOnQ(key): From 303c3d625631ef30409cddfd39f6a8a1ca656fb2 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 22 Aug 2023 19:52:36 -0500 Subject: [PATCH 02/46] Add exponential backoff to throttle. Fix wretched math. Add necessary custom topology for p2p_sync_throttle_test. --- plugins/net_plugin/net_plugin.cpp | 20 ++-- tests/CMakeLists.txt | 1 + tests/p2p_sync_throttle_test.py | 9 +- tests/p2p_sync_throttle_test_shape.json | 142 ++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 13 deletions(-) create mode 100644 tests/p2p_sync_throttle_test_shape.json diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 0634d21557..eeb22f6098 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -968,10 +968,11 @@ namespace eosio { void send_time(const time_message& msg); /** \brief Read system time and convert to a 64 bit integer. * - * There are five calls to this routine in the program. One + * There are six calls to this routine in the program. One * when a packet arrives from the network, one when a packet - * is placed on the send queue, one during start session, and - * one each when data is counted as received or sent. + * is placed on the send queue, one during start session, one + * when a sync block is queued and one each when data is + * counted as received or sent. * Calls the kernel time of day routine and converts to * a (at least) 64 bit integer. */ @@ -1218,7 +1219,7 @@ namespace eosio { std::regex_match(units, units_match, units_regex); if( units_match.size() == 2 ) { block_sync_rate_limit = static_cast(limit * prefix_multipliers.at(units_match[1].str())); - peer_dlog( this, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); + fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); } else { fc_wlog( logger, "listen address ${la} has invalid block sync limit specification, connection will not be throttled", ("la", listen_address)); block_sync_rate_limit = 0; @@ -1820,16 +1821,17 @@ namespace eosio { auto sb = buff_factory.get_send_buffer( b ); latest_blk_time = std::chrono::system_clock::now(); if( block_sync_rate_limit > 0 ) { - peer_dlog( this, "block_sync_rate_limit is set to ${l}", ("l", block_sync_rate_limit)); + int sleep_time_us = 100; + const int max_sleep_time_us = 100000; while( true) { - auto elapsed = std::chrono::time_point_cast(latest_blk_time) - connection_start_time; - auto current_rate = block_sync_bytes_sent / elapsed.time_since_epoch().count(); + auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); + auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); if( current_rate < block_sync_rate_limit ) { enqueue_buffer( sb, no_reason, to_sync_queue); break; } - peer_dlog( this, "throttling sending to peer ${remote}", ("remote", log_remote_endpoint_ip)); - usleep(100); + usleep(sleep_time_us); + sleep_time_us = sleep_time_us*2 > max_sleep_time_us ? max_sleep_time_us : sleep_time_us*2; } } else { enqueue_buffer( sb, no_reason, to_sync_queue); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e51901b817..ba86e17bf2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -52,6 +52,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_high_latency_test.py ${CMAKE_CURR configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_multiple_listen_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_multiple_listen_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_no_listen_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_no_listen_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_sync_throttle_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_sync_throttle_test.py COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_sync_throttle_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/p2p_sync_throttle_test_shape.json COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/compute_transaction_test.py ${CMAKE_CURRENT_BINARY_DIR}/compute_transaction_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/subjective_billing_test.py ${CMAKE_CURRENT_BINARY_DIR}/subjective_billing_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/get_account_test.py ${CMAKE_CURRENT_BINARY_DIR}/get_account_test.py COPYONLY) diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 13273b6efe..4ee9b19fd7 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -50,8 +50,9 @@ extraNodeosArgs = ''.join([i+j for i,j in zip([' --plugin '] * len(args.plugin), args.plugin)]) else: extraNodeosArgs = '' - if cluster.launch(pnodes=pnodes, unstartedNodes=2, totalNodes=total_nodes, prodCount=prod_count, topo='line', - delay=delay, extraNodeosArgs=extraNodeosArgs) is False: + if cluster.launch(pnodes=pnodes, unstartedNodes=2, totalNodes=total_nodes, prodCount=prod_count, + topo='./tests/p2p_sync_throttle_test_shape.json', delay=delay, + extraNodeosArgs=extraNodeosArgs) is False: errorExit("Failed to stand up eos cluster.") prodNode = cluster.getNode(0) @@ -95,7 +96,7 @@ throttleNode = cluster.unstartedNodes[0] i = throttleNode.cmd.index('--p2p-listen-endpoint') - throttleNode.cmd[i+1] = throttleNode.cmd[i+1] + ':100B/s' + throttleNode.cmd[i+1] = throttleNode.cmd[i+1] + ':1000B/s' cluster.biosNode.kill(signal.SIGTERM) clusterStart = time.time() @@ -109,7 +110,7 @@ endSync = time.time() Print(f'Unthrottled sync time: {endUnthrottledSync - clusterStart} seconds') Print(f'Throttled sync time: {endSync - clusterStart} seconds') - assert endSync - clusterStart > endUnthrottledSync - clusterStart + 10, 'Throttled sync time must be at least 10 seconds greater than unthrottled' + assert endSync - clusterStart > endUnthrottledSync - clusterStart + 50, 'Throttled sync time must be at least 50 seconds greater than unthrottled' testSuccessful=True finally: diff --git a/tests/p2p_sync_throttle_test_shape.json b/tests/p2p_sync_throttle_test_shape.json new file mode 100644 index 0000000000..4252ab483a --- /dev/null +++ b/tests/p2p_sync_throttle_test_shape.json @@ -0,0 +1,142 @@ +{ + "name": "testnet_", + "nodes": { + "bios": { + "index": -100, + "name": "bios", + "keys": [ + { + "pubkey": "EOS6MRyAjQq8ud7hVNYcfnVPJqcVpscN5So8BhtHuGYqET5GDW5CV", + "privkey": "5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3" + } + ], + "peers": [ + "testnet_00" + ], + "producers": [ + "eosio" + ], + "dont_start": false, + "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_bios", + "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_bios", + "p2p_port": 9776, + "http_port": 8788, + "host_name": "localhost", + "public_name": "localhost", + "listen_addr": "0.0.0.0", + "_dot_label": "localhost:9776\nbios\nprod=eosio" + }, + "testnet_00": { + "index": 0, + "name": "testnet_00", + "keys": [ + { + "pubkey": "EOS7D6jfN6bbJD9cYheyhnBT4bmUWc3Qf4Yphf5GBeAAy58okcwHU", + "privkey": "5KkmnyunnpCQzgFoLMEtU3j7BRBa5aWmsBNru49ke7LdnZKFhmt" + } + ], + "peers": [], + "producers": [ + "defproducera", + "defproducerb", + "defproducerc", + "defproducerd", + "defproducere", + "defproducerf", + "defproducerg", + "defproducerh", + "defproduceri", + "defproducerj", + "defproducerk", + "defproducerl", + "defproducerm", + "defproducern", + "defproducero", + "defproducerp", + "defproducerq", + "defproducerr", + "defproducers", + "defproducert", + "defproduceru" + ], + "dont_start": false, + "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_00", + "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_00", + "p2p_port": 9876, + "http_port": 8888, + "host_name": "localhost", + "public_name": "localhost", + "listen_addr": "0.0.0.0", + "_dot_label": "localhost:9876\ntestnet_00\nprod=defproducera\ndefproducerb\ndefproducerc\ndefproducerd\ndefproducere\ndefproducerf\ndefproducerg\ndefproducerh\ndefproduceri\ndefproducerj\ndefproducerk\ndefproducerl\ndefproducerm\ndefproducern\ndefproducero\ndefproducerp\ndefproducerq\ndefproducerr\ndefproducers\ndefproducert\ndefproduceru" + }, + "testnet_01": { + "index": 1, + "name": "testnet_01", + "keys": [ + { + "pubkey": "EOS5tZqxLB8y9q2yHkgcXU4QFBEV6QKN3NQ54ygaFLWHJbjqYzFhw", + "privkey": "5KBs4qR7T8shJjCJUeFQXd77iKrok5TCtZiQhWJpCpc1VRxpNAs" + } + ], + "peers": [ + "testnet_00" + ], + "producers": [], + "dont_start": false, + "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_01", + "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_01", + "p2p_port": 9877, + "http_port": 8889, + "host_name": "localhost", + "public_name": "localhost", + "listen_addr": "0.0.0.0", + "_dot_label": "localhost:9877\ntestnet_01\nprod=" + }, + "testnet_02": { + "index": 2, + "name": "testnet_02", + "keys": [ + { + "pubkey": "EOS5FBPf5EN9bYEqmsKfPx9bxyUZ9grDiE24zqLFXtPa6UpVzMjE7", + "privkey": "5HtVDiAsD24seDm5sdswTcdZpx672XbBW9gBkyrzbsj2j9Y9JeC" + } + ], + "peers": [ + "testnet_01" + ], + "producers": [], + "dont_start": true, + "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_02", + "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_02", + "p2p_port": 9878, + "http_port": 8890, + "host_name": "localhost", + "public_name": "localhost", + "listen_addr": "0.0.0.0", + "_dot_label": "localhost:9878\ntestnet_02\nprod=" + }, + "testnet_03": { + "index": 3, + "name": "testnet_03", + "keys": [ + { + "pubkey": "EOS8XH2gKxsef9zxmMHm4vaSvxQUhg7W4GC3nK2KSRxyYrNG5gZFS", + "privkey": "5JcoRRhDcgm51dkBrRTmErceTqrYhrq22UnmUjTZToMpH91B9N1" + } + ], + "peers": [ + "testnet_02" + ], + "producers": [], + "dont_start": true, + "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_03", + "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_03", + "p2p_port": 9879, + "http_port": 8891, + "host_name": "localhost", + "public_name": "localhost", + "listen_addr": "0.0.0.0", + "_dot_label": "localhost:9879\ntestnet_03\nprod=" + } + } +} \ No newline at end of file From 70b530bb4a750fce2aa3c9037b2bdcb64c7ddb0b Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Thu, 24 Aug 2023 14:07:58 -0500 Subject: [PATCH 03/46] Experiment: How many tests fail if waitForObj default times out --- tests/TestHarness/testUtils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/TestHarness/testUtils.py b/tests/TestHarness/testUtils.py index c386953c4c..df320ca2ca 100644 --- a/tests/TestHarness/testUtils.py +++ b/tests/TestHarness/testUtils.py @@ -256,6 +256,9 @@ def waitForObj(lam, timeout=None, sleepTime=1, reporter=None): if reporter is not None: reporter() time.sleep(sleepTime) + else: + if timeout == 60: + raise RuntimeError('waitForObj reached 60 second timeout') finally: if needsNewLine: Utils.Print() From b92d84cae994b69cda28332edcc2aa3878533405 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Thu, 24 Aug 2023 16:14:16 -0500 Subject: [PATCH 04/46] Address review comments in net_plugin. Clarify variable names in p2p throttled sync test and tweak numbers. Fix p2p throttled test to actually function (waitForBlock has a hidden default timeout). Bump up timeout in block_log_util_test. --- plugins/net_plugin/net_plugin.cpp | 74 ++++++++++++++++--------------- tests/block_log_util_test.py | 2 +- tests/p2p_sync_throttle_test.py | 22 ++++----- 3 files changed, 51 insertions(+), 47 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index eeb22f6098..8496bed571 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -536,7 +536,7 @@ namespace eosio { bool in_sync() const; fc::logger& get_logger() { return logger; } - void create_session(tcp::socket&& socket, const string listen_address, const string limit); + void create_session(tcp::socket&& socket, const string listen_address, const string& limit); }; // peer_[x]log must be called from thread in connection strand @@ -788,6 +788,7 @@ namespace eosio { static std::string state_str(connection_state s); const string& peer_address() const { return peer_addr; } // thread safe, const + void set_connection_limit( const string& limit_str ); void set_connection_type( const string& peer_addr ); bool is_transactions_only_connection()const { return connection_type == transactions_only; } // thread safe, atomic bool is_blocks_only_connection()const { return connection_type == blocks_only; } @@ -1206,32 +1207,11 @@ namespace eosio { last_handshake_recv(), last_handshake_sent() { - std::istringstream in(limit_str); - fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); - in.imbue(std::locale("")); - double limit{0}; - in >> limit; - if( limit > 0.0f ) { - std::string units; - in >> units; - std::regex units_regex{"([KMGT]?[i]?)B/s"}; - std::smatch units_match; - std::regex_match(units, units_match, units_regex); - if( units_match.size() == 2 ) { - block_sync_rate_limit = static_cast(limit * prefix_multipliers.at(units_match[1].str())); - fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); - } else { - fc_wlog( logger, "listen address ${la} has invalid block sync limit specification, connection will not be throttled", ("la", listen_address)); - block_sync_rate_limit = 0; - } - } else { - block_sync_rate_limit = static_cast(limit); - } + set_connection_limit(limit_str); update_endpoints(); fc_dlog( logger, "new connection object created for peer ${address}:${port} from listener ${addr}", ("address", log_remote_endpoint_ip)("port", log_remote_endpoint_port)("addr", listen_address) ); } - // called from connection strand void connection::update_endpoints() { boost::system::error_code ec; boost::system::error_code ec2; @@ -1258,6 +1238,30 @@ namespace eosio { } } + void connection::set_connection_limit( const std::string& limit_str) { + std::istringstream in(limit_str); + fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); + in.imbue(std::locale("")); + double limit{0}; + in >> limit; + if( limit > 0.0f ) { + std::string units; + in >> units; + std::regex units_regex{"([KMGT]?[i]?)B/s"}; + std::smatch units_match; + std::regex_match(units, units_match, units_regex); + try { + block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); + fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); + } catch (boost::numeric::bad_numeric_cast&) { + fc_wlog( logger, "listen address ${la} block sync limit specification overflowed, connection will not be throttled", ("la", listen_address)); + block_sync_rate_limit = 0; + } + } else { + block_sync_rate_limit = 0; + } + } + // called from connection strand void connection::set_connection_type( const std::string& peer_add ) { auto [host, port, type] = split_host_port_type(peer_add); @@ -2767,7 +2771,7 @@ namespace eosio { } - void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, const string limit) { + void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, const string& limit) { uint32_t visitors = 0; uint32_t from_addr = 0; boost::system::error_code rec; @@ -3975,24 +3979,24 @@ namespace eosio { void net_plugin::set_program_options( options_description& /*cli*/, options_description& cfg ) { cfg.add_options() - ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876:0")) ), "The actual host:port used to listen for incoming p2p connections. May be used multiple times. " - "Block syncing to all peers connected via the port will be throttled to the specified rate. " - "See the 'p2p-peer-address' argument for format details.") + ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876:0")) ), "The actual host:port[:] used to listen for incoming p2p connections. May be used multiple times. " + " The optional rate cap will limit block sync bandwidth to the specified rate. A number alone will be " + " interpreted as bytes per second. The number may be suffixed with units. Supported units are: " + " 'B/s', 'KB/s', 'MB/s, 'GB/s', 'TB/s', 'KiB/s', 'MiB/s', 'GiB/s', 'TiB/s'." + " Transactions and blocks outside of sync mode are not throttled." + " Examples:\n" + " 192.168.0.100:9876:1MiB/s\n" + " node.eos.io:9876:250KB/s\n" + " node.eos.io:9876:0.5GB/s") ( "p2p-server-address", bpo::value< vector >(), "An externally accessible host:port for identifying this node. Defaults to p2p-listen-endpoint. May be used as many times as p2p-listen-endpoint. If provided, the first address will be used in handshakes with other nodes. Otherwise the default is used.") ( "p2p-peer-address", bpo::value< vector >()->composing(), "The public endpoint of a peer node to connect to. Use multiple p2p-peer-address options as needed to compose a network.\n" - " Syntax: host:port[:|][:]\n" + " Syntax: host:port[:|]\n" " The optional 'trx' and 'blk' indicates to node that only transactions 'trx' or blocks 'blk' should be sent." - " The optional rate cap will limit block sync bandwidth to the specified rate. A number alone will be " - " interpreted as bytes per second. The number may be suffixed with units. Supported units are: " - " 'B/s', 'KB/s', 'MB/s, 'GB/s', and 'TB/s'. Transactions and blocks outside of sync mode are not throttled." " Examples:\n" " p2p.eos.io:9876\n" " p2p.trx.eos.io:9876:trx\n" - " p2p.blk.eos.io:9876:blk\n" - " p2p.eos.io:9876:1MB/s\n" - " p2p.blk.eos.io:9876:blk:250KB/s\n" - " p2p.eos.io:9876:0.5GB/s") + " p2p.blk.eos.io:9876:blk\n") ( "p2p-max-nodes-per-host", bpo::value()->default_value(def_max_nodes_per_host), "Maximum number of client nodes from any single IP address") ( "p2p-accept-transactions", bpo::value()->default_value(true), "Allow transactions received over p2p network to be evaluated and relayed if valid.") ( "p2p-auto-bp-peer", bpo::value< vector >()->composing(), diff --git a/tests/block_log_util_test.py b/tests/block_log_util_test.py index bd7bff144e..042e1467aa 100755 --- a/tests/block_log_util_test.py +++ b/tests/block_log_util_test.py @@ -70,7 +70,7 @@ def verifyBlockLog(expected_block_num, trimmedBlockLog): node0.kill(signal.SIGTERM) Print("Wait for node0's head block to become irreversible") - node1.waitForBlock(headBlockNum, blockType=BlockType.lib) + node1.waitForBlock(headBlockNum, blockType=BlockType.lib, timeout=90) infoAfter=node1.getInfo(exitOnError=True) headBlockNumAfter=infoAfter["head_block_num"] diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 4ee9b19fd7..380f3558e2 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -94,23 +94,23 @@ endLargeBlocksHeadBlock = nonProdNode.getHeadBlockNum() - throttleNode = cluster.unstartedNodes[0] - i = throttleNode.cmd.index('--p2p-listen-endpoint') - throttleNode.cmd[i+1] = throttleNode.cmd[i+1] + ':1000B/s' + throttlingNode = cluster.unstartedNodes[0] + i = throttlingNode.cmd.index('--p2p-listen-endpoint') + throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':40000B/s' cluster.biosNode.kill(signal.SIGTERM) clusterStart = time.time() cluster.launchUnstarted(2) - syncNode = cluster.getNode(3) + throttledNode = cluster.getNode(3) time.sleep(15) - throttleNode.waitForBlock(endLargeBlocksHeadBlock) - endUnthrottledSync = time.time() - syncNode.waitForBlock(endLargeBlocksHeadBlock) - endSync = time.time() - Print(f'Unthrottled sync time: {endUnthrottledSync - clusterStart} seconds') - Print(f'Throttled sync time: {endSync - clusterStart} seconds') - assert endSync - clusterStart > endUnthrottledSync - clusterStart + 50, 'Throttled sync time must be at least 50 seconds greater than unthrottled' + assert throttlingNode.waitForBlock(endLargeBlocksHeadBlock), f'wait for block {endLargeBlocksHeadBlock} on throttled node timed out' + endThrottlingSync = time.time() + assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=90), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' + endThrottledSync = time.time() + Print(f'Unthrottled sync time: {endThrottlingSync - clusterStart} seconds') + Print(f'Throttled sync time: {endThrottledSync - clusterStart} seconds') + assert endThrottledSync - clusterStart > endThrottlingSync - clusterStart + 30, 'Throttled sync time must be at least 30 seconds greater than unthrottled' testSuccessful=True finally: From dc54d46bde8367f335305c4f332ca5db503dab8a Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Thu, 24 Aug 2023 16:56:33 -0500 Subject: [PATCH 05/46] Further tweak the sync throttle test for machines faster than mine. --- tests/p2p_sync_throttle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 380f3558e2..6c9d059bf0 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -110,7 +110,7 @@ endThrottledSync = time.time() Print(f'Unthrottled sync time: {endThrottlingSync - clusterStart} seconds') Print(f'Throttled sync time: {endThrottledSync - clusterStart} seconds') - assert endThrottledSync - clusterStart > endThrottlingSync - clusterStart + 30, 'Throttled sync time must be at least 30 seconds greater than unthrottled' + assert endThrottledSync - clusterStart > endThrottlingSync - clusterStart + 15, 'Throttled sync time must be at least 15 seconds greater than unthrottled' testSuccessful=True finally: From 3a508641e8a0db0d32f10110391cb4756802f6ce Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 25 Aug 2023 15:23:32 -0500 Subject: [PATCH 06/46] Move block sync throttling to the correct layer in the call stack. Remove exponential backoff in throttle and utilize existing retry mechanism. --- plugins/net_plugin/net_plugin.cpp | 34 ++++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 8496bed571..b8bd0e0217 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1685,7 +1685,7 @@ namespace eosio { } else { peer_dlog( this, "enqueue sync block ${num}", ("num", peer_requested->last + 1) ); } - uint32_t num = ++peer_requested->last; + uint32_t num = peer_requested->last + 1; if(num == peer_requested->end_block) { peer_requested.reset(); peer_dlog( this, "completing enqueue_sync_block ${num}", ("num", num) ); @@ -1697,14 +1697,25 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { - block_sync_bytes_sent += enqueue_block( sb, true ); + if( block_sync_rate_limit > 0 ) { + auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); + auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); + if( current_rate < block_sync_rate_limit ) { + block_sync_bytes_sent += enqueue_block( sb, true ); + ++peer_requested->last; + } else { + return false; + } + } else { + block_sync_bytes_sent += enqueue_block( sb, true ); + ++peer_requested->last; + } } else { peer_ilog( this, "enqueue sync, unable to fetch block ${num}, sending benign_other go away", ("num", num) ); peer_requested.reset(); // unable to provide requested blocks no_retry = benign_other; enqueue( go_away_message( benign_other ) ); } - return true; } @@ -1824,22 +1835,7 @@ namespace eosio { block_buffer_factory buff_factory; auto sb = buff_factory.get_send_buffer( b ); latest_blk_time = std::chrono::system_clock::now(); - if( block_sync_rate_limit > 0 ) { - int sleep_time_us = 100; - const int max_sleep_time_us = 100000; - while( true) { - auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); - auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); - if( current_rate < block_sync_rate_limit ) { - enqueue_buffer( sb, no_reason, to_sync_queue); - break; - } - usleep(sleep_time_us); - sleep_time_us = sleep_time_us*2 > max_sleep_time_us ? max_sleep_time_us : sleep_time_us*2; - } - } else { - enqueue_buffer( sb, no_reason, to_sync_queue); - } + enqueue_buffer( sb, no_reason, to_sync_queue); return sb->size(); } From e1c1d429207e037ed95d1d42a0fc2756baa5344d Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 25 Aug 2023 17:09:51 -0500 Subject: [PATCH 07/46] Move block sync rate limit parsing to plugin initialize. --- plugins/net_plugin/net_plugin.cpp | 69 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index b8bd0e0217..61e9b1a94a 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -476,6 +476,7 @@ namespace eosio { std::function increment_dropped_trxs; private: + static const std::map prefix_multipliers; alignas(hardware_destructive_interference_size) mutable fc::mutex chain_info_mtx; // protects chain_info_t chain_info_t chain_info GUARDED_BY(chain_info_mtx); @@ -530,14 +531,15 @@ namespace eosio { constexpr static uint16_t to_protocol_version(uint16_t v); + size_t parse_connection_limit(const string& limit_str); void plugin_initialize(const variables_map& options); void plugin_startup(); void plugin_shutdown(); bool in_sync() const; fc::logger& get_logger() { return logger; } - void create_session(tcp::socket&& socket, const string listen_address, const string& limit); - }; + void create_session(tcp::socket&& socket, const string listen_address, size_t limit); + }; //net_plugin_impl // peer_[x]log must be called from thread in connection strand #define peer_dlog( PEER, FORMAT, ... ) \ @@ -772,7 +774,7 @@ namespace eosio { /// @brief ctor /// @param socket created by boost::asio in fc::listener /// @param address identifier of listen socket which accepted this new connection - explicit connection( tcp::socket&& socket, const string& listen_address, const string& limit_str ); + explicit connection( tcp::socket&& socket, const string& listen_address, size_t block_sync_rate_limit ); ~connection() = default; connection( const connection& ) = delete; @@ -788,7 +790,6 @@ namespace eosio { static std::string state_str(connection_state s); const string& peer_address() const { return peer_addr; } // thread safe, const - void set_connection_limit( const string& limit_str ); void set_connection_type( const string& peer_addr ); bool is_transactions_only_connection()const { return connection_type == transactions_only; } // thread safe, atomic bool is_blocks_only_connection()const { return connection_type == blocks_only; } @@ -812,7 +813,6 @@ namespace eosio { private: static const string unknown; - static const std::map prefix_multipliers; std::atomic peer_ping_time_ns = std::numeric_limits::max(); @@ -1063,7 +1063,7 @@ namespace eosio { }; // class connection const string connection::unknown = ""; - const std::map connection::prefix_multipliers{ + const std::map net_plugin_impl::prefix_multipliers{ {"",1},{"K",pow(10,3)},{"M",pow(10,6)},{"G",pow(10, 9)},{"T",pow(10, 12)}, {"Ki",pow(2,10)},{"Mi",pow(2,20)},{"Gi",pow(2,30)},{"Ti",pow(2,40)} }; @@ -1197,9 +1197,10 @@ namespace eosio { fc_ilog( logger, "created connection ${c} to ${n}", ("c", connection_id)("n", endpoint) ); } - connection::connection(tcp::socket&& s, const string& listen_address, const string& limit_str) + connection::connection(tcp::socket&& s, const string& listen_address, size_t block_sync_rate_limit) : listen_address( listen_address ), peer_addr(), + block_sync_rate_limit(block_sync_rate_limit), strand( my_impl->thread_pool.get_executor() ), socket( new tcp::socket( std::move(s) ) ), connection_id( ++my_impl->current_connection_id ), @@ -1207,7 +1208,6 @@ namespace eosio { last_handshake_recv(), last_handshake_sent() { - set_connection_limit(limit_str); update_endpoints(); fc_dlog( logger, "new connection object created for peer ${address}:${port} from listener ${addr}", ("address", log_remote_endpoint_ip)("port", log_remote_endpoint_port)("addr", listen_address) ); } @@ -1238,30 +1238,6 @@ namespace eosio { } } - void connection::set_connection_limit( const std::string& limit_str) { - std::istringstream in(limit_str); - fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); - in.imbue(std::locale("")); - double limit{0}; - in >> limit; - if( limit > 0.0f ) { - std::string units; - in >> units; - std::regex units_regex{"([KMGT]?[i]?)B/s"}; - std::smatch units_match; - std::regex_match(units, units_match, units_regex); - try { - block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); - fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); - } catch (boost::numeric::bad_numeric_cast&) { - fc_wlog( logger, "listen address ${la} block sync limit specification overflowed, connection will not be throttled", ("la", listen_address)); - block_sync_rate_limit = 0; - } - } else { - block_sync_rate_limit = 0; - } - } - // called from connection strand void connection::set_connection_type( const std::string& peer_add ) { auto [host, port, type] = split_host_port_type(peer_add); @@ -2767,7 +2743,7 @@ namespace eosio { } - void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, const string& limit) { + void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, size_t limit) { uint32_t visitors = 0; uint32_t from_addr = 0; boost::system::error_code rec; @@ -4039,6 +4015,29 @@ namespace eosio { return fc::json::from_string(s).as(); } + size_t net_plugin_impl::parse_connection_limit( const std::string& limit_str) { + std::istringstream in(limit_str); + fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); + in.imbue(std::locale("")); + double limit{0}; + in >> limit; + size_t block_sync_rate_limit = 0; + if( limit > 0.0f ) { + std::string units; + in >> units; + std::regex units_regex{"([KMGT]?[i]?)B/s"}; + std::smatch units_match; + std::regex_match(units, units_match, units_regex); + try { + block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); + fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); + } catch (boost::numeric::bad_numeric_cast&) { + EOS_ASSERT(false, plugin_config_exception, "block sync limit specification overflowed: ${limit}", ("limit", limit_str)); + } + } + return block_sync_rate_limit; + } + void net_plugin_impl::plugin_initialize( const variables_map& options ) { try { fc_ilog( logger, "Initialize net plugin" ); @@ -4257,9 +4256,11 @@ namespace eosio { limit = std::string(address, last_colon_location+1); } + auto block_sync_rate_limit = my->parse_connection_limit(limit); + fc::create_listener( my->thread_pool.get_executor(), logger, accept_timeout, listen_addr, extra_listening_log_info, - [my = my, addr = p2p_addr, limit = limit](tcp::socket&& socket) { fc_dlog( logger, "start listening on ${addr} with peer sync throttle ${limit}", ("addr", addr)("limit", limit)); my->create_session(std::move(socket), addr, limit); }); + [my = my, addr = p2p_addr, block_sync_rate_limit = block_sync_rate_limit](tcp::socket&& socket) { fc_dlog( logger, "start listening on ${addr} with peer sync throttle ${limit}", ("addr", addr)("limit", block_sync_rate_limit)); my->create_session(std::move(socket), addr, block_sync_rate_limit); }); } catch (const std::exception& e) { fc_elog( logger, "net_plugin::plugin_startup failed to listen on ${addr}, ${what}", ("addr", address)("what", e.what()) ); From 92e4e7cefd8098f1da8f2f68158d4248387f57f2 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 29 Aug 2023 19:24:26 -0500 Subject: [PATCH 08/46] Require IPv6 addresses to be in square bracket format. Fix parsing and overflow problems and address peer review comments. Extend throttle test to add another throttle prefix. --- plugins/net_plugin/net_plugin.cpp | 39 +++++++++++++++++++------------ tests/p2p_sync_throttle_test.py | 4 ++++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 61e9b1a94a..565538e6e5 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -476,7 +476,10 @@ namespace eosio { std::function increment_dropped_trxs; private: - static const std::map prefix_multipliers; + inline static const std::map prefix_multipliers{ + {"",1},{"K",pow(10,3)},{"M",pow(10,6)},{"G",pow(10, 9)},{"T",pow(10, 12)}, + {"Ki",pow(2,10)},{"Mi",pow(2,20)},{"Gi",pow(2,30)},{"Ti",pow(2,40)} + }; alignas(hardware_destructive_interference_size) mutable fc::mutex chain_info_mtx; // protects chain_info_t chain_info_t chain_info GUARDED_BY(chain_info_mtx); @@ -531,7 +534,7 @@ namespace eosio { constexpr static uint16_t to_protocol_version(uint16_t v); - size_t parse_connection_limit(const string& limit_str); + size_t parse_connection_rate_limit(const string& limit_str); void plugin_initialize(const variables_map& options); void plugin_startup(); void plugin_shutdown(); @@ -1063,10 +1066,6 @@ namespace eosio { }; // class connection const string connection::unknown = ""; - const std::map net_plugin_impl::prefix_multipliers{ - {"",1},{"K",pow(10,3)},{"M",pow(10,6)},{"G",pow(10, 9)},{"T",pow(10, 12)}, - {"Ki",pow(2,10)},{"Mi",pow(2,20)},{"Gi",pow(2,30)},{"Ti",pow(2,40)} - }; // called from connection strand struct msg_handler : public fc::visitor { @@ -4015,12 +4014,13 @@ namespace eosio { return fc::json::from_string(s).as(); } - size_t net_plugin_impl::parse_connection_limit( const std::string& limit_str) { + size_t net_plugin_impl::parse_connection_rate_limit( const std::string& limit_str) { std::istringstream in(limit_str); - fc_dlog( logger, "parsing connection endpoint with locale ${l}", ("l", std::locale("").name())); + fc_dlog( logger, "parsing connection endpoint limit ${limit} with locale ${l}", ("limit", limit_str)("l", std::locale("").name())); in.imbue(std::locale("")); double limit{0}; in >> limit; + EOS_ASSERT(limit >= 0, plugin_config_exception, "block sync rate limit must be positive: ${limit}", ("limit", limit_str)); size_t block_sync_rate_limit = 0; if( limit > 0.0f ) { std::string units; @@ -4029,10 +4029,11 @@ namespace eosio { std::smatch units_match; std::regex_match(units, units_match, units_regex); try { + EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); - fc_dlog( logger, "setting block_sync_rate_limit to ${limit}", ("limit", block_sync_rate_limit)); + fc_dlog( logger, "setting block_sync_rate_limit to ${limit} bytes per second", ("limit", block_sync_rate_limit)); } catch (boost::numeric::bad_numeric_cast&) { - EOS_ASSERT(false, plugin_config_exception, "block sync limit specification overflowed: ${limit}", ("limit", limit_str)); + EOS_ASSERT(false, plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); } } return block_sync_rate_limit; @@ -4250,13 +4251,21 @@ namespace eosio { auto listen_addr = address; auto limit = string("0"); - if( std::count(address.begin(), address.end(), ':') > 1 ) { - auto last_colon_location = address.rfind(':'); - listen_addr = std::string(address, 0, last_colon_location); - limit = std::string(address, last_colon_location+1); + auto last_colon_location = address.rfind(':'); + if( auto right_bracket_location = address.find(']'); right_bracket_location != address.npos ) { + if( std::count(address.begin()+right_bracket_location, address.end(), ':') > 1 ) { + listen_addr = std::string(address, 0, last_colon_location); + limit = std::string(address, last_colon_location+1); + } + } else { + if( auto colon_count = std::count(address.begin(), address.end(), ':'); colon_count > 1 ) { + EOS_ASSERT( colon_count <= 2, plugin_config_exception, "Invalid address specification ${addr}; IPv6 addresses must be enclosed in square brackets.", ("addr", address)); + listen_addr = std::string(address, 0, last_colon_location); + limit = std::string(address, last_colon_location+1); + } } - auto block_sync_rate_limit = my->parse_connection_limit(limit); + auto block_sync_rate_limit = my->parse_connection_rate_limit(limit); fc::create_listener( my->thread_pool.get_executor(), logger, accept_timeout, listen_addr, extra_listening_log_info, diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 6c9d059bf0..a319d934f3 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -96,7 +96,11 @@ throttlingNode = cluster.unstartedNodes[0] i = throttlingNode.cmd.index('--p2p-listen-endpoint') + throttleListenAddr = throttlingNode.cmd[i+1] throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':40000B/s' + throttleListenIP, throttleListenPort = throttleListenAddr.split(':') + throttlingNode.cmd.append('--p2p-listen-endpoint') + throttlingNode.cmd.append(f'{throttleListenIP}:{int(throttleListenPort)+100}:1TB/s') cluster.biosNode.kill(signal.SIGTERM) clusterStart = time.time() From 28bb38d2e52a389e68bb761f2a69442a233efff3 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 30 Aug 2023 16:05:40 -0500 Subject: [PATCH 09/46] Added throttle exception for configured p2p-peer-addresses. Added additional code comments. Addressed peer review comment. --- .../include/eosio/net_plugin/protocol.hpp | 3 +- plugins/net_plugin/net_plugin.cpp | 35 +++++++++++++++---- tests/p2p_sync_throttle_test.py | 10 ++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp index 7e292e7bf2..d37fdbc18d 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp @@ -16,7 +16,8 @@ namespace eosio { // Longest domain name is 253 characters according to wikipedia. // Addresses include ":port" where max port is 65535, which adds 6 chars. - // Addresses may also include ":bitrate" with suffix and separators, which adds 30 chars. + // Addresses may also include ":bitrate" with suffix and separators, which adds 30 chars, + // for the maximum comma-separated value that fits in a size_t expressed in decimal plus a suffix. // We also add our own extentions of "[:trx|:blk] - xxxxxxx", which adds 14 chars, total= 273. // Allow for future extentions as well, hence 384. constexpr size_t max_p2p_address_length = 253 + 6 + 30; diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 565538e6e5..0dffbad4a9 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -387,6 +387,9 @@ namespace eosio { std::optional status(const string& host) const; vector connection_statuses() const; + template + void for_each_supplied_peer(Function&& f) const; + template void for_each_connection(Function&& f) const; @@ -1143,6 +1146,11 @@ namespace eosio { } + template + void connections_manager::for_each_supplied_peer( Function&& f ) const { + std::for_each(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); + } + template void connections_manager::for_each_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); @@ -2768,6 +2776,16 @@ namespace eosio { visitors < connections.get_max_client_count())) { fc_ilog(logger, "Accepted new connection: " + paddr_str); + connections.for_each_supplied_peer([&listen_address, &paddr_str, &limit](const string& peer_addr) { + auto [host, port, type] = split_host_port_type(peer_addr); + if (host == paddr_str) { + if (limit > 0) { + fc_dlog(logger, "Connection inbound to ${la} from ${a} is a configured p2p-peer-address and will not be throttled", ("la", listen_address)("a", paddr_str)); + } + limit = 0; + } + }); + connection_ptr new_connection = std::make_shared(std::move(socket), listen_address, limit); new_connection->strand.post([new_connection, this]() { if (new_connection->start_session()) { @@ -3951,14 +3969,17 @@ namespace eosio { { cfg.add_options() ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876:0")) ), "The actual host:port[:] used to listen for incoming p2p connections. May be used multiple times. " - " The optional rate cap will limit block sync bandwidth to the specified rate. A number alone will be " - " interpreted as bytes per second. The number may be suffixed with units. Supported units are: " + " The optional rate cap will limit per connection block sync bandwidth to the specified rate. Total " + " allowed bandwidth is the rate-cap multiplied by the connection count limit. A number alone will be " + " interpreted as bytes per second. The number is parsed locale-aware and may include thousands and " + " decimal separators. It may also be suffixed with units. Supported units are: " " 'B/s', 'KB/s', 'MB/s, 'GB/s', 'TB/s', 'KiB/s', 'MiB/s', 'GiB/s', 'TiB/s'." " Transactions and blocks outside of sync mode are not throttled." " Examples:\n" " 192.168.0.100:9876:1MiB/s\n" - " node.eos.io:9876:250KB/s\n" - " node.eos.io:9876:0.5GB/s") + " node.eos.io:9876:1,512KB/s\n" + " node.eos.io:9876:0.5GB/s\n" + " [2001:db8:85a3:8d3:1319:8a2e:370:7348]:9876:250KB/s") ( "p2p-server-address", bpo::value< vector >(), "An externally accessible host:port for identifying this node. Defaults to p2p-listen-endpoint. May be used as many times as p2p-listen-endpoint. If provided, the first address will be used in handshakes with other nodes. Otherwise the default is used.") ( "p2p-peer-address", bpo::value< vector >()->composing(), "The public endpoint of a peer node to connect to. Use multiple p2p-peer-address options as needed to compose a network.\n" @@ -4020,9 +4041,9 @@ namespace eosio { in.imbue(std::locale("")); double limit{0}; in >> limit; - EOS_ASSERT(limit >= 0, plugin_config_exception, "block sync rate limit must be positive: ${limit}", ("limit", limit_str)); + EOS_ASSERT(limit >= 0.0, plugin_config_exception, "block sync rate limit must be positive: ${limit}", ("limit", limit_str)); size_t block_sync_rate_limit = 0; - if( limit > 0.0f ) { + if( limit > 0.0 ) { std::string units; in >> units; std::regex units_regex{"([KMGT]?[i]?)B/s"}; @@ -4033,7 +4054,7 @@ namespace eosio { block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); fc_dlog( logger, "setting block_sync_rate_limit to ${limit} bytes per second", ("limit", block_sync_rate_limit)); } catch (boost::numeric::bad_numeric_cast&) { - EOS_ASSERT(false, plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); + EOS_THROW(plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); } } return block_sync_rate_limit; diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index a319d934f3..0de560d44e 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -50,6 +50,8 @@ extraNodeosArgs = ''.join([i+j for i,j in zip([' --plugin '] * len(args.plugin), args.plugin)]) else: extraNodeosArgs = '' + # Custom topology is a line of singlely connected nodes from highest node number in sequence to lowest, + # the reverse of the usual TestHarness line topology. if cluster.launch(pnodes=pnodes, unstartedNodes=2, totalNodes=total_nodes, prodCount=prod_count, topo='./tests/p2p_sync_throttle_test_shape.json', delay=delay, extraNodeosArgs=extraNodeosArgs) is False: @@ -97,6 +99,9 @@ throttlingNode = cluster.unstartedNodes[0] i = throttlingNode.cmd.index('--p2p-listen-endpoint') throttleListenAddr = throttlingNode.cmd[i+1] + # Using 40000 bytes per second to allow syncing of 10,000 byte blocks resulting from + # the trx generators in a reasonable amount of time, while still being reliably + # distinguishable from unthrottled throughput. throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':40000B/s' throttleListenIP, throttleListenPort = throttleListenAddr.split(':') throttlingNode.cmd.append('--p2p-listen-endpoint') @@ -108,12 +113,17 @@ throttledNode = cluster.getNode(3) time.sleep(15) + # Throttling node was offline during block generation and once online receives blocks as fast as possible while + # transmitting blocks to the next node in line at the above throttle setting. assert throttlingNode.waitForBlock(endLargeBlocksHeadBlock), f'wait for block {endLargeBlocksHeadBlock} on throttled node timed out' endThrottlingSync = time.time() + # Throttled node is connecting to a listen port with a block sync throttle applied so it will receive + # blocks more slowly during syncing than an unthrottled node. assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=90), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' endThrottledSync = time.time() Print(f'Unthrottled sync time: {endThrottlingSync - clusterStart} seconds') Print(f'Throttled sync time: {endThrottledSync - clusterStart} seconds') + # 15 seconds chosen as the minimum reasonable sync time differential given the throttle and the average block size. assert endThrottledSync - clusterStart > endThrottlingSync - clusterStart + 15, 'Throttled sync time must be at least 15 seconds greater than unthrottled' testSuccessful=True From e998cc6ecdfd510a5cf63d89a2f7e819b19ed6e8 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Mon, 11 Sep 2023 11:27:26 -0500 Subject: [PATCH 10/46] Debug commit, doesn't build. --- .../eosio/net_plugin/auto_bp_peering.hpp | 6 +- plugins/net_plugin/net_plugin.cpp | 267 +++++++++++------- .../tests/auto_bp_peering_unittest.cpp | 44 +-- 3 files changed, 185 insertions(+), 132 deletions(-) diff --git a/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp b/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp index b5122f80aa..8a4f736680 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp @@ -145,7 +145,7 @@ class bp_connection_manager { // Only called from connection strand std::size_t num_established_clients() const { uint32_t num_clients = 0; - self()->connections.for_each_connection([&num_clients](auto&& conn) { + self()->connections.for_each_connection([&num_clients](const std::shared_ptr& conn) { if (established_client_connection(conn)) { ++num_clients; } @@ -157,7 +157,7 @@ class bp_connection_manager { // Only called from connection strand // This should only be called after the first handshake message is received to check if an incoming connection // has exceeded the pre-configured max_client_count limit. - bool exceeding_connection_limit(Connection* new_connection) const { + bool exceeding_connection_limit(std::shared_ptr new_connection) const { return auto_bp_peering_enabled() && self()->connections.get_max_client_count() != 0 && established_client_connection(new_connection) && num_established_clients() > self()->connections.get_max_client_count(); } @@ -182,7 +182,7 @@ class bp_connection_manager { fc_dlog(self()->get_logger(), "pending_downstream_neighbors: ${pending_downstream_neighbors}", ("pending_downstream_neighbors", to_string(pending_downstream_neighbors))); - for (auto neighbor : pending_downstream_neighbors) { self()->connections.connect(config.bp_peer_addresses[neighbor], *self()->p2p_addresses.begin() ); } + for (auto neighbor : pending_downstream_neighbors) { self()->connections.resolve_and_connect(config.bp_peer_addresses[neighbor], *self()->p2p_addresses.begin() ); } pending_neighbors = std::move(pending_downstream_neighbors); finder.add_upstream_neighbors(pending_neighbors); diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 0dffbad4a9..5021096ced 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,7 @@ namespace eosio { using boost::asio::ip::address_v4; using boost::asio::ip::host_name; using boost::multi_index_container; + using namespace boost::multi_index; using fc::time_point; using fc::time_point_sec; @@ -334,9 +336,37 @@ namespace eosio { constexpr uint32_t packed_transaction_which = fc::get_index(); // see protocol net_message class connections_manager { + public: + struct connection_details { + std::string host; + connection_ptr c; + tcp::endpoint active_ip; + std::vector ips; + operator const connection_ptr&() const { return c; } + }; + + using connection_details_index = multi_index_container< + connection_details, + indexed_by< + ordered_unique< + tag, + key<&connection_details::host> + >, + ordered_unique< + tag, + key<&connection_details::c> + >, + ordered_non_unique< + tag, + key<&connection_details::active_ip> + > + > + >; + private: alignas(hardware_destructive_interference_size) mutable std::shared_mutex connections_mtx; - chain::flat_set connections; + //chain::flat_set connections GUARDED_BY(connections_mtx); + connection_details_index peer_ips GUARDED_BY(connections_mtx); chain::flat_set supplied_peers; alignas(hardware_destructive_interference_size) @@ -352,8 +382,6 @@ namespace eosio { private: // must call with held mutex connection_ptr find_connection_i(const string& host) const; - void add_i(connection_ptr&& c); - void connect_i(const string& peer, const string& p2p_address); void connection_monitor(const std::weak_ptr& from_connection); @@ -380,7 +408,7 @@ namespace eosio { void stop_conn_timer(); void add(connection_ptr c); - string connect(const string& host, const string& p2p_address); + string resolve_and_connect(const string& host, const string& p2p_address); string disconnect(const string& host); void close_all(); @@ -401,7 +429,7 @@ namespace eosio { template bool any_of_block_connections(UnaryPredicate&& p) const; - }; + }; // connections_manager class net_plugin_impl : public std::enable_shared_from_this, public auto_bp_peering::bp_connection_manager { @@ -829,7 +857,6 @@ namespace eosio { std::atomic conn_state{connection_state::connecting}; - string listen_address; // address sent to peer in handshake const string peer_addr; enum connection_types : char { both, @@ -862,6 +889,7 @@ namespace eosio { fc::sha256 conn_node_id; string short_conn_node_id; + string listen_address; // address sent to peer in handshake string log_p2p_address; string log_remote_endpoint_ip; string log_remote_endpoint_port; @@ -940,13 +968,16 @@ namespace eosio { bool process_next_block_message(uint32_t message_length); bool process_next_trx_message(uint32_t message_length); - void update_endpoints(); + void update_endpoints(const tcp::endpoint& endpoint = tcp::endpoint()); public: bool populate_handshake( handshake_message& hello ) const; - bool resolve_and_connect(); - void connect( const std::shared_ptr& resolver, const tcp::resolver::results_type& endpoints ); +// bool connect(); + //typedef boost:multi_index::index void connections_manager::for_each_supplied_peer( Function&& f ) const { + std::shared_lock g( connections_mtx ); std::for_each(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); } template void connections_manager::for_each_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); - std::for_each(connections.begin(), connections.end(), std::forward(f)); + auto& index = peer_ips.get(); + std::for_each(index.begin(), index.end(), std::forward(f)); } template void connections_manager::for_each_block_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); - for( auto& c : connections ) { + auto& index = peer_ips.get(); + for( const connection_ptr& c : index ) { if (c->is_blocks_connection()) { f(c); } @@ -1170,13 +1204,15 @@ namespace eosio { template bool connections_manager::any_of_connections(UnaryPredicate&& p) const { std::shared_lock g(connections_mtx); - return std::any_of(connections.cbegin(), connections.cend(), std::forward(p)); + auto& index = peer_ips.get(); + return std::any_of(index.cbegin(), index.cend(), std::forward(p)); } template bool connections_manager::any_of_block_connections(UnaryPredicate&& p) const { std::shared_lock g( connections_mtx ); - for( auto& c : connections ) { + auto& index = peer_ips.get(); + for( const connection_ptr& c : index ) { if( c->is_blocks_connection() ) { if (p(c)) return true; @@ -1189,10 +1225,10 @@ namespace eosio { //--------------------------------------------------------------------------- connection::connection( const string& endpoint, const string& listen_address ) - : listen_address( listen_address ), - peer_addr( endpoint ), + : peer_addr( endpoint ), strand( my_impl->thread_pool.get_executor() ), socket( new tcp::socket( my_impl->thread_pool.get_executor() ) ), + listen_address( listen_address ), log_p2p_address( endpoint ), connection_id( ++my_impl->current_connection_id ), response_expected_timer( my_impl->thread_pool.get_executor() ), @@ -1205,11 +1241,11 @@ namespace eosio { } connection::connection(tcp::socket&& s, const string& listen_address, size_t block_sync_rate_limit) - : listen_address( listen_address ), - peer_addr(), + : peer_addr(), block_sync_rate_limit(block_sync_rate_limit), strand( my_impl->thread_pool.get_executor() ), socket( new tcp::socket( std::move(s) ) ), + listen_address( listen_address ), connection_id( ++my_impl->current_connection_id ), response_expected_timer( my_impl->thread_pool.get_executor() ), last_handshake_recv(), @@ -1219,10 +1255,10 @@ namespace eosio { fc_dlog( logger, "new connection object created for peer ${address}:${port} from listener ${addr}", ("address", log_remote_endpoint_ip)("port", log_remote_endpoint_port)("addr", listen_address) ); } - void connection::update_endpoints() { + void connection::update_endpoints(const tcp::endpoint& endpoint) { boost::system::error_code ec; boost::system::error_code ec2; - auto rep = socket->remote_endpoint(ec); + auto rep = endpoint == tcp::endpoint() ? socket->remote_endpoint(ec) : endpoint; auto lep = socket->local_endpoint(ec2); remote_endpoint_port = ec ? 0 : rep.port(); log_remote_endpoint_ip = ec ? unknown : rep.address().to_string(); @@ -2099,7 +2135,7 @@ namespace eosio { // static, thread safe void sync_manager::send_handshakes() { - my_impl->connections.for_each_connection( []( auto& ci ) { + my_impl->connections.for_each_connection( []( const connection_ptr& ci ) { if( ci->current() ) { ci->send_handshake(); } @@ -2575,7 +2611,7 @@ namespace eosio { void dispatch_manager::bcast_transaction(const packed_transaction_ptr& trx) { trx_buffer_factory buff_factory; const fc::time_point_sec now{fc::time_point::now()}; - my_impl->connections.for_each_connection( [this, &trx, &now, &buff_factory]( auto& cp ) { + my_impl->connections.for_each_connection( [this, &trx, &now, &buff_factory]( const connection_ptr& cp ) { if( !cp->is_transactions_connection() || !cp->current() ) { return; } @@ -2674,7 +2710,8 @@ namespace eosio { //------------------------------------------------------------------------ // called from any thread - bool connection::resolve_and_connect() { +#if 0 + bool connection::connect() { switch ( no_retry ) { case no_reason: case wrong_version: @@ -2686,12 +2723,6 @@ namespace eosio { return false; } - string::size_type colon = peer_address().find(':'); - if (colon == std::string::npos || colon == 0) { - fc_elog( logger, "Invalid peer address. must be \"host:port[:|]\": ${p}", ("p", peer_address()) ); - return false; - } - connection_ptr c = shared_from_this(); if( consecutive_immediate_connection_close > def_max_consecutive_immediate_connection_close || no_retry == benign_other ) { @@ -2703,17 +2734,15 @@ namespace eosio { } strand.post([c]() { - auto [host, port, type] = split_host_port_type(c->peer_address()); c->set_connection_type( c->peer_address() ); - auto resolver = std::make_shared( my_impl->thread_pool.get_executor() ); connection_wptr weak_conn = c; resolver->async_resolve(host, port, boost::asio::bind_executor( c->strand, [resolver, weak_conn, host = host, port = port]( const boost::system::error_code& err, const tcp::resolver::results_type& endpoints ) { auto c = weak_conn.lock(); if( !c ) return; if( !err ) { - c->connect( resolver, endpoints ); + c->connect( endpoints ); } else { fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", ("host", host)("port", port)( "error", err.message() ) ); @@ -2724,17 +2753,23 @@ namespace eosio { } ); return true; } - +#endif // called from connection strand - void connection::connect( const std::shared_ptr& resolver, const tcp::resolver::results_type& endpoints ) { + void connection::connect( const tcp::resolver::results_type& endpoints, + connections_manager::connection_details_index& connections, + connections_manager::connection_details_index::const_iterator conn_details ) { set_state(connection_state::connecting); pending_message_buffer.reset(); buffer_queue.clear_out_queue(); boost::asio::async_connect( *socket, endpoints, boost::asio::bind_executor( strand, - [resolver, c = shared_from_this(), socket=socket]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { + [c = shared_from_this(), socket=socket, connections, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { if( !err && socket->is_open() && socket == c->socket ) { - c->update_endpoints(); + auto& index = connections.get(); + index.modify_key(connections.project(conn_details), [endpoint](tcp::endpoint& e) { + e = endpoint; + }); + c->update_endpoints(endpoint); if( c->start_session() ) { c->send_handshake(); c->send_time(); @@ -2760,7 +2795,7 @@ namespace eosio { fc_elog(logger, "Error getting remote endpoint: ${m}", ("m", rec.message())); } else { paddr_str = paddr_add.to_string(); - connections.for_each_connection([&visitors, &from_addr, &paddr_str](auto& conn) { + connections.for_each_connection([&visitors, &from_addr, &paddr_str](const connection_ptr& conn) { if (conn->socket_is_open()) { if (conn->peer_address().empty()) { ++visitors; @@ -3216,7 +3251,7 @@ namespace eosio { log_p2p_address = msg.p2p_address; my_impl->mark_bp_connection(this); - if (my_impl->exceeding_connection_limit(this)) { + if (my_impl->exceeding_connection_limit(shared_from_this())) { // When auto bp peering is enabled, create_session() check doesn't have enough information to determine // if a client is a BP peer. In create_session(), it only has the peer address which a node is connecting // from, but it would be different from the address it is listening. The only way to make sure is when the @@ -3233,7 +3268,7 @@ namespace eosio { set_connection_type( msg.p2p_address ); peer_dlog( this, "checking for duplicate" ); - auto is_duplicate = [&](const auto& check) { + auto is_duplicate = [&](const connection_ptr& check) { if(check.get() == this) return false; fc::unique_lock g_check_conn( check->conn_mtx ); @@ -3787,7 +3822,7 @@ namespace eosio { } auto current_time = std::chrono::system_clock::now(); - my->connections.for_each_connection( [current_time]( auto& c ) { + my->connections.for_each_connection( [current_time]( const connection_ptr& c ) { if( c->socket_is_open() ) { c->strand.post([c, current_time]() { c->check_heartbeat(current_time); @@ -4340,7 +4375,7 @@ namespace eosio { /// RPC API string net_plugin::connect( const string& host ) { - return my->connections.connect( host, *my->p2p_addresses.begin() ); + return my->connections.resolve_and_connect( host, *my->p2p_addresses.begin() ); } /// RPC API @@ -4386,7 +4421,7 @@ namespace eosio { size_t connections_manager::number_connections() const { std::lock_guard g(connections_mtx); - return connections.size(); + return peer_ips.size(); } void connections_manager::add_supplied_peers(const vector& peers ) { @@ -4415,9 +4450,8 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { - std::lock_guard g(connections_mtx); for (const auto& peer : supplied_peers) { - connect_i(peer, p2p_address); + resolve_and_connect(peer, p2p_address); } } @@ -4427,23 +4461,54 @@ namespace eosio { } // called by API - string connections_manager::connect( const string& host, const string& p2p_address ) { + string connections_manager::resolve_and_connect( const string& peer_address, const string& listen_address ) { + string::size_type colon = peer_address.find(':'); + if (colon == std::string::npos || colon == 0) { + fc_elog( logger, "Invalid peer address. must be \"host:port[:|]\": ${p}", ("p", peer_address) ); + return "invalid peer address"; + } + std::lock_guard g( connections_mtx ); - if( find_connection_i( host ) ) + if( find_connection_i( peer_address ) ) return "already connected"; - connect_i( host, p2p_address ); - supplied_peers.insert(host); + supplied_peers.insert(peer_address); + auto [host, port, type] = split_host_port_type(peer_address); + + auto resolver = std::make_shared( my_impl->thread_pool.get_executor() ); + + resolver->async_resolve(host, port, + [resolver, host = host, port = port, peer_address = peer_address, listen_address = listen_address, this]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { + connection_ptr c = std::make_shared( peer_address, listen_address ); + c->set_heartbeat_timeout( heartbeat_timeout ); + vector eps{results.begin(), results.end()}; + std::lock_guard g( connections_mtx ); + auto [it, inserted] = peer_ips.insert( connection_details{ + .host = peer_address, + .c = std::move(c), + .ips = std::move(eps) + }); + if( !err ) { + c->connect( results, peer_ips, it ); + } else { + fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", + ("host", host)("port", port)( "error", err.message() ) ); + c->set_state(connection::connection_state::closed); + ++c->consecutive_immediate_connection_close; + } + } ); + return "added connection"; } // called by API string connections_manager::disconnect( const string& host ) { std::lock_guard g( connections_mtx ); - if( auto c = find_connection_i( host ) ) { - fc_ilog( logger, "disconnecting: ${cid}", ("cid", c->connection_id) ); - c->close(); - connections.erase(c); + auto& index = peer_ips.get(); + if( auto i = index.find( host ); i != index.end() ) { + fc_ilog( logger, "disconnecting: ${cid}", ("cid", i->c->connection_id) ); + i->c->close(); + peer_ips.erase(i); supplied_peers.erase(host); return "connection removed"; } @@ -4451,13 +4516,14 @@ namespace eosio { } void connections_manager::close_all() { - fc_ilog( logger, "close all ${s} connections", ("s", connections.size()) ); + auto& index = peer_ips.get(); + fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); std::lock_guard g( connections_mtx ); - for( auto& con : connections ) { - fc_dlog( logger, "close: ${cid}", ("cid", con->connection_id) ); - con->close( false, true ); + for( const connection_ptr& c : index ) { + fc_dlog( logger, "close: ${cid}", ("cid", c->connection_id) ); + c->close( false, true ); } - connections.clear(); + peer_ips.clear(); } std::optional connections_manager::status( const string& host )const { @@ -4472,8 +4538,9 @@ namespace eosio { vector connections_manager::connection_statuses()const { vector result; std::shared_lock g( connections_mtx ); - result.reserve( connections.size() ); - for( const auto& c : connections ) { + auto& index = peer_ips.get(); + result.reserve( index.size() ); + for( const connection_ptr& c : index ) { result.push_back( c->get_status() ); } return result; @@ -4481,29 +4548,13 @@ namespace eosio { // call with connections_mtx connection_ptr connections_manager::find_connection_i( const string& host )const { - for( const auto& c : connections ) { - if (c->peer_address() == host) - return c; - } + auto& index = peer_ips.get(); + auto iter = index.find(host); + if(iter != index.end()) + return iter->c; return {}; } - // call with connections_mtx - void connections_manager::connect_i( const string& host, const string& p2p_address ) { - connection_ptr c = std::make_shared( host, p2p_address ); - fc_dlog( logger, "calling active connector: ${h}", ("h", host) ); - if( c->resolve_and_connect() ) { - fc_dlog( logger, "adding new connection to the list: ${host} ${cid}", ("host", host)("cid", c->connection_id) ); - add_i( std::move(c) ); - } - } - - // call with connections_mtx - void connections_manager::add_i(connection_ptr&& c) { - c->set_heartbeat_timeout( heartbeat_timeout ); - connections.insert( std::move(c) ); - } - // called from any thread void connections_manager::start_conn_timer() { start_conn_timer(connector_period, {}); // this locks mutex @@ -4535,13 +4586,14 @@ namespace eosio { auto max_time = fc::time_point::now().safe_add(max_cleanup_time); auto from = from_connection.lock(); std::unique_lock g( connections_mtx ); - auto it = (from ? connections.find(from) : connections.begin()); - if (it == connections.end()) it = connections.begin(); + auto& index = peer_ips.get(); + auto it = (from ? index.find(from) : index.begin()); + if (it == index.end()) it = index.begin(); size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; - net_plugin::p2p_per_connection_metrics per_connection(connections.size()); - while (it != connections.end()) { + net_plugin::p2p_per_connection_metrics per_connection(index.size()); + while (it != index.end()) { if (fc::time_point::now() >= max_time) { - connection_wptr wit = *it; + connection_wptr wit = (*it).c; g.unlock(); fc_dlog( logger, "Exiting connection monitor early, ran out of time: ${t}", ("t", max_time - fc::time_point::now()) ); fc_ilog( logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}", @@ -4549,42 +4601,43 @@ namespace eosio { start_conn_timer( std::chrono::milliseconds( 1 ), wit ); // avoid exhausting return; } - if ((*it)->is_bp_connection) { + const connection_ptr& c = it->c; + if (c->is_bp_connection) { ++num_bp_peers; - } else if ((*it)->incoming()) { + } else if (c->incoming()) { ++num_clients; } else { ++num_peers; } if (update_p2p_connection_metrics) { - fc::unique_lock g_conn((*it)->conn_mtx); - boost::asio::ip::address_v6::bytes_type addr = (*it)->remote_endpoint_ip_array; + fc::unique_lock g_conn(c->conn_mtx); + boost::asio::ip::address_v6::bytes_type addr = c->remote_endpoint_ip_array; g_conn.unlock(); net_plugin::p2p_per_connection_metrics::connection_metric metrics{ - .connection_id = (*it)->connection_id + .connection_id = c->connection_id , .address = addr - , .port = (*it)->get_remote_endpoint_port() - , .accepting_blocks = (*it)->is_blocks_connection() - , .last_received_block = (*it)->get_last_received_block_num() - , .first_available_block = (*it)->get_peer_start_block_num() - , .last_available_block = (*it)->get_peer_head_block_num() - , .unique_first_block_count = (*it)->get_unique_blocks_rcvd_count() - , .latency = (*it)->get_peer_ping_time_ns() - , .bytes_received = (*it)->get_bytes_received() - , .last_bytes_received = (*it)->get_last_bytes_received() - , .bytes_sent = (*it)->get_bytes_sent() - , .last_bytes_sent = (*it)->get_last_bytes_sent() - , .block_sync_bytes_sent = (*it)->get_block_sync_bytes_sent() - , .connection_start_time = (*it)->connection_start_time - , .log_p2p_address = (*it)->log_p2p_address + , .port = c->get_remote_endpoint_port() + , .accepting_blocks = c->is_blocks_connection() + , .last_received_block = c->get_last_received_block_num() + , .first_available_block = c->get_peer_start_block_num() + , .last_available_block = c->get_peer_head_block_num() + , .unique_first_block_count = c->get_unique_blocks_rcvd_count() + , .latency = c->get_peer_ping_time_ns() + , .bytes_received = c->get_bytes_received() + , .last_bytes_received = c->get_last_bytes_received() + , .bytes_sent = c->get_bytes_sent() + , .last_bytes_sent = c->get_last_bytes_sent() + , .block_sync_bytes_sent = c->get_block_sync_bytes_sent() + , .connection_start_time = c->connection_start_time + , .log_p2p_address = c->log_p2p_address }; per_connection.peers.push_back(metrics); } - if (!(*it)->socket_is_open() && (*it)->state() != connection::connection_state::connecting) { - if (!(*it)->incoming()) { - if (!(*it)->resolve_and_connect()) { - it = connections.erase(it); + if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { + if (!c->incoming()) { + if (!resolve_and_connect(c->peer_address(), c->listen_address)) { + it = index.erase(it); --num_peers; ++num_rm; continue; @@ -4592,7 +4645,7 @@ namespace eosio { } else { --num_clients; ++num_rm; - it = connections.erase(it); + it = index.erase(it); continue; } } diff --git a/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp b/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp index ddfeba7b1c..d9e0594793 100644 --- a/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp +++ b/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp @@ -16,9 +16,9 @@ using namespace std::literals::string_literals; struct mock_connections_manager { uint32_t max_client_count = 0; - std::vector connections; + std::vector> connections; - std::function connect; + std::function resolve_and_connect; std::function disconnect; uint32_t get_max_client_count() const { return max_client_count; } @@ -26,7 +26,7 @@ struct mock_connections_manager { template void for_each_connection(Function&& func) const { for (auto c : connections) { - if (!func(&c)) + if (!func(c)) return; } } @@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE(test_on_pending_schedule) { std::vector connected_hosts; - plugin.connections.connect = [&connected_hosts](std::string host, std::string p2p_address) { connected_hosts.push_back(host); }; + plugin.connections.resolve_and_connect = [&connected_hosts](std::string host, std::string p2p_address) { connected_hosts.push_back(host); }; // make sure nothing happens when it is not in_sync plugin.is_in_sync = false; @@ -210,7 +210,7 @@ BOOST_AUTO_TEST_CASE(test_on_active_schedule1) { plugin.config.my_bp_accounts = { "prodd"_n, "produ"_n }; plugin.active_neighbors = { "proda"_n, "prodh"_n, "prodn"_n }; - plugin.connections.connect = [](std::string host, std::string p2p_address) {}; + plugin.connections.resolve_and_connect = [](std::string host, std::string p2p_address) {}; std::vector disconnected_hosts; plugin.connections.disconnect = [&disconnected_hosts](std::string host) { disconnected_hosts.push_back(host); }; @@ -246,7 +246,7 @@ BOOST_AUTO_TEST_CASE(test_on_active_schedule2) { plugin.config.my_bp_accounts = { "prodd"_n, "produ"_n }; plugin.active_neighbors = { "proda"_n, "prodh"_n, "prodn"_n }; - plugin.connections.connect = [](std::string host, std::string p2p_address) {}; + plugin.connections.resolve_and_connect = [](std::string host, std::string p2p_address) {}; std::vector disconnected_hosts; plugin.connections.disconnect = [&disconnected_hosts](std::string host) { disconnected_hosts.push_back(host); }; @@ -272,24 +272,24 @@ BOOST_AUTO_TEST_CASE(test_exceeding_connection_limit) { plugin.config.my_bp_accounts = { "prodd"_n, "produ"_n }; plugin.connections.max_client_count = 1; plugin.connections.connections = { - { .is_bp_connection = true, .is_open = true, .handshake_received = true }, // 0 - { .is_bp_connection = true, .is_open = true, .handshake_received = false }, // 1 - { .is_bp_connection = true, .is_open = false, .handshake_received = true }, // 2 - { .is_bp_connection = true, .is_open = false, .handshake_received = false }, // 3 - { .is_bp_connection = false, .is_open = true, .handshake_received = true }, // 4 - { .is_bp_connection = false, .is_open = true, .handshake_received = false }, // 5 - { .is_bp_connection = false, .is_open = true, .handshake_received = true }, // 6 - { .is_bp_connection = false, .is_open = false, .handshake_received = false } // 7 + std::make_shared( true, true, true ), // 0 + std::make_shared( true, true, false ), // 1 + std::make_shared( true, false, true ), // 2 + std::make_shared( true, false, false ), // 3 + std::make_shared( false, true, true ), // 4 + std::make_shared( false, true, false ), // 5 + std::make_shared( false, true, true ), // 6 + std::make_shared( false, false, false ) // 7 }; BOOST_CHECK_EQUAL(plugin.num_established_clients(), 2u); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[0])); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[1])); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[2])); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[3])); - BOOST_CHECK(plugin.exceeding_connection_limit(&plugin.connections.connections[4])); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[5])); - BOOST_CHECK(plugin.exceeding_connection_limit(&plugin.connections.connections[6])); - BOOST_CHECK(!plugin.exceeding_connection_limit(&plugin.connections.connections[7])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[0])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[1])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[2])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[3])); + BOOST_CHECK(plugin.exceeding_connection_limit(plugin.connections.connections[4])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[5])); + BOOST_CHECK(plugin.exceeding_connection_limit(plugin.connections.connections[6])); + BOOST_CHECK(!plugin.exceeding_connection_limit(plugin.connections.connections[7])); } From 92e402213e6c0d502f63a8088a5b6c6add5cdf0d Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Mon, 11 Sep 2023 15:53:19 -0500 Subject: [PATCH 11/46] Fix build error. Lambda captures by value are const. Update connections_manager::add method. Clean up cruft and rename connection data structure back to 'connections'. --- plugins/net_plugin/net_plugin.cpp | 86 +++++++++++-------------------- 1 file changed, 30 insertions(+), 56 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 5021096ced..7f3159046b 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -365,8 +365,7 @@ namespace eosio { private: alignas(hardware_destructive_interference_size) mutable std::shared_mutex connections_mtx; - //chain::flat_set connections GUARDED_BY(connections_mtx); - connection_details_index peer_ips GUARDED_BY(connections_mtx); + connection_details_index connections GUARDED_BY(connections_mtx); chain::flat_set supplied_peers; alignas(hardware_destructive_interference_size) @@ -973,8 +972,6 @@ namespace eosio { bool populate_handshake( handshake_message& hello ) const; -// bool connect(); - //typedef boost:multi_index::index void connections_manager::for_each_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); std::for_each(index.begin(), index.end(), std::forward(f)); } template void connections_manager::for_each_block_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); for( const connection_ptr& c : index ) { if (c->is_blocks_connection()) { f(c); @@ -1204,14 +1201,14 @@ namespace eosio { template bool connections_manager::any_of_connections(UnaryPredicate&& p) const { std::shared_lock g(connections_mtx); - auto& index = peer_ips.get(); + auto& index = connections.get(); return std::any_of(index.cbegin(), index.cend(), std::forward(p)); } template bool connections_manager::any_of_block_connections(UnaryPredicate&& p) const { std::shared_lock g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); for( const connection_ptr& c : index ) { if( c->is_blocks_connection() ) { if (p(c)) @@ -2709,9 +2706,10 @@ namespace eosio { //------------------------------------------------------------------------ - // called from any thread -#if 0 - bool connection::connect() { + // called from connection strand + void connection::connect( const tcp::resolver::results_type& endpoints, + connections_manager::connection_details_index& connections, + connections_manager::connection_details_index::const_iterator conn_details ) { switch ( no_retry ) { case no_reason: case wrong_version: @@ -2720,50 +2718,21 @@ namespace eosio { break; default: fc_dlog( logger, "Skipping connect due to go_away reason ${r}",("r", reason_str( no_retry ))); - return false; + return; } - - connection_ptr c = shared_from_this(); - if( consecutive_immediate_connection_close > def_max_consecutive_immediate_connection_close || no_retry == benign_other ) { fc::microseconds connector_period = my_impl->connections.get_connector_period(); fc::lock_guard g( conn_mtx ); if( last_close == fc::time_point() || last_close > fc::time_point::now() - connector_period ) { - return true; // true so doesn't remove from valid connections + return; } } - - strand.post([c]() { - c->set_connection_type( c->peer_address() ); - - connection_wptr weak_conn = c; - resolver->async_resolve(host, port, boost::asio::bind_executor( c->strand, - [resolver, weak_conn, host = host, port = port]( const boost::system::error_code& err, const tcp::resolver::results_type& endpoints ) { - auto c = weak_conn.lock(); - if( !c ) return; - if( !err ) { - c->connect( endpoints ); - } else { - fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", - ("host", host)("port", port)( "error", err.message() ) ); - c->set_state(connection_state::closed); - ++c->consecutive_immediate_connection_close; - } - } ) ); - } ); - return true; - } -#endif - // called from connection strand - void connection::connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index& connections, - connections_manager::connection_details_index::const_iterator conn_details ) { set_state(connection_state::connecting); pending_message_buffer.reset(); buffer_queue.clear_out_queue(); boost::asio::async_connect( *socket, endpoints, boost::asio::bind_executor( strand, - [c = shared_from_this(), socket=socket, connections, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { + [c = shared_from_this(), socket=socket, &connections, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { if( !err && socket->is_open() && socket == c->socket ) { auto& index = connections.get(); index.modify_key(connections.project(conn_details), [endpoint](tcp::endpoint& e) { @@ -4421,7 +4390,7 @@ namespace eosio { size_t connections_manager::number_connections() const { std::lock_guard g(connections_mtx); - return peer_ips.size(); + return connections.size(); } void connections_manager::add_supplied_peers(const vector& peers ) { @@ -4457,7 +4426,12 @@ namespace eosio { void connections_manager::add( connection_ptr c ) { std::lock_guard g( connections_mtx ); - add_i( std::move(c) ); + boost::system::error_code ec; + auto endpoint = c->socket->remote_endpoint(ec); + connections.insert( connection_details{ + .host = c->peer_address(), + .c = std::move(c), + .active_ip = endpoint} ); } // called by API @@ -4483,18 +4457,18 @@ namespace eosio { c->set_heartbeat_timeout( heartbeat_timeout ); vector eps{results.begin(), results.end()}; std::lock_guard g( connections_mtx ); - auto [it, inserted] = peer_ips.insert( connection_details{ + auto [it, inserted] = connections.insert( connection_details{ .host = peer_address, .c = std::move(c), .ips = std::move(eps) }); if( !err ) { - c->connect( results, peer_ips, it ); + it->c->connect( results, connections, it ); } else { fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", ("host", host)("port", port)( "error", err.message() ) ); - c->set_state(connection::connection_state::closed); - ++c->consecutive_immediate_connection_close; + it->c->set_state(connection::connection_state::closed); + ++(it->c->consecutive_immediate_connection_close); } } ); @@ -4504,11 +4478,11 @@ namespace eosio { // called by API string connections_manager::disconnect( const string& host ) { std::lock_guard g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); if( auto i = index.find( host ); i != index.end() ) { fc_ilog( logger, "disconnecting: ${cid}", ("cid", i->c->connection_id) ); i->c->close(); - peer_ips.erase(i); + connections.erase(i); supplied_peers.erase(host); return "connection removed"; } @@ -4516,14 +4490,14 @@ namespace eosio { } void connections_manager::close_all() { - auto& index = peer_ips.get(); + auto& index = connections.get(); fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); std::lock_guard g( connections_mtx ); for( const connection_ptr& c : index ) { fc_dlog( logger, "close: ${cid}", ("cid", c->connection_id) ); c->close( false, true ); } - peer_ips.clear(); + connections.clear(); } std::optional connections_manager::status( const string& host )const { @@ -4538,7 +4512,7 @@ namespace eosio { vector connections_manager::connection_statuses()const { vector result; std::shared_lock g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); result.reserve( index.size() ); for( const connection_ptr& c : index ) { result.push_back( c->get_status() ); @@ -4548,7 +4522,7 @@ namespace eosio { // call with connections_mtx connection_ptr connections_manager::find_connection_i( const string& host )const { - auto& index = peer_ips.get(); + auto& index = connections.get(); auto iter = index.find(host); if(iter != index.end()) return iter->c; @@ -4586,7 +4560,7 @@ namespace eosio { auto max_time = fc::time_point::now().safe_add(max_cleanup_time); auto from = from_connection.lock(); std::unique_lock g( connections_mtx ); - auto& index = peer_ips.get(); + auto& index = connections.get(); auto it = (from ? index.find(from) : index.begin()); if (it == index.end()) it = index.begin(); size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; From b24f8e3b847e3e819e6657b0f326f56ed3d1022b Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Mon, 11 Sep 2023 16:25:33 -0500 Subject: [PATCH 12/46] Fix bare numeric value for peer throttle. --- plugins/net_plugin/net_plugin.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 7f3159046b..87b120e0be 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4054,7 +4054,9 @@ namespace eosio { std::smatch units_match; std::regex_match(units, units_match, units_regex); try { - EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); + if( units.length() > 0 ) { + EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); + } block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); fc_dlog( logger, "setting block_sync_rate_limit to ${limit} bytes per second", ("limit", block_sync_rate_limit)); } catch (boost::numeric::bad_numeric_cast&) { From 3f67034a596615c49a6fea41d6e29a55978d87e2 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Mon, 11 Sep 2023 19:08:09 -0500 Subject: [PATCH 13/46] Update supplied_peers only once per configured peer and once per API call. --- plugins/net_plugin/net_plugin.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 87b120e0be..bd6687f9a3 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -407,6 +407,7 @@ namespace eosio { void stop_conn_timer(); void add(connection_ptr c); + string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); string disconnect(const string& host); void close_all(); @@ -4346,7 +4347,7 @@ namespace eosio { /// RPC API string net_plugin::connect( const string& host ) { - return my->connections.resolve_and_connect( host, *my->p2p_addresses.begin() ); + return my->connections.connect( host, *my->p2p_addresses.begin() ); } /// RPC API @@ -4437,6 +4438,11 @@ namespace eosio { } // called by API + string connections_manager::connect( const string& host, const string& p2p_address ) { + supplied_peers.insert(host); + return resolve_and_connect( host, p2p_address ); + } + string connections_manager::resolve_and_connect( const string& peer_address, const string& listen_address ) { string::size_type colon = peer_address.find(':'); if (colon == std::string::npos || colon == 0) { @@ -4448,7 +4454,6 @@ namespace eosio { if( find_connection_i( peer_address ) ) return "already connected"; - supplied_peers.insert(peer_address); auto [host, port, type] = split_host_port_type(peer_address); auto resolver = std::make_shared( my_impl->thread_pool.get_executor() ); From e59451dbfb315a9475ec0e262e28b777e07fb469 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 15 Sep 2023 11:00:53 -0500 Subject: [PATCH 14/46] Remove encapsulation violation. Address peer review comments. --- .../libfc/include/fc/exception/exception.hpp | 6 ++- plugins/net_plugin/net_plugin.cpp | 48 ++++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/libraries/libfc/include/fc/exception/exception.hpp b/libraries/libfc/include/fc/exception/exception.hpp index c3baa2c9d5..80fe7694e3 100644 --- a/libraries/libfc/include/fc/exception/exception.hpp +++ b/libraries/libfc/include/fc/exception/exception.hpp @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include namespace fc { @@ -394,7 +397,8 @@ namespace fc */ #define FC_THROW_EXCEPTION( EXCEPTION, FORMAT, ... ) \ FC_MULTILINE_MACRO_BEGIN \ - throw EXCEPTION( FC_LOG_MESSAGE( error, FORMAT, __VA_ARGS__ ) ); \ + using traced = boost::error_info; \ + throw boost::enable_error_info( EXCEPTION( FC_LOG_MESSAGE( error, FORMAT, __VA_ARGS__ ) ) ) << traced(boost::stacktrace::stacktrace()); \ FC_MULTILINE_MACRO_END diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index bd6687f9a3..af86d9f31d 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -365,7 +367,7 @@ namespace eosio { private: alignas(hardware_destructive_interference_size) mutable std::shared_mutex connections_mtx; - connection_details_index connections GUARDED_BY(connections_mtx); + connection_details_index connections; chain::flat_set supplied_peers; alignas(hardware_destructive_interference_size) @@ -409,6 +411,7 @@ namespace eosio { void add(connection_ptr c); string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); + void update_connection_endpoint(connection_details_index::const_iterator it, const tcp::endpoint& endpoint); string disconnect(const string& host); void close_all(); @@ -974,7 +977,6 @@ namespace eosio { bool populate_handshake( handshake_message& hello ) const; void connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index& connections, connections_manager::connection_details_index::const_iterator conn_details ); void start_read_message(); @@ -2708,8 +2710,7 @@ namespace eosio { //------------------------------------------------------------------------ // called from connection strand - void connection::connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index& connections, + void connection::connect( const tcp::resolver::results_type& endpoints, connections_manager::connection_details_index::const_iterator conn_details ) { switch ( no_retry ) { case no_reason: @@ -2733,12 +2734,9 @@ namespace eosio { buffer_queue.clear_out_queue(); boost::asio::async_connect( *socket, endpoints, boost::asio::bind_executor( strand, - [c = shared_from_this(), socket=socket, &connections, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { + [c = shared_from_this(), socket=socket, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { if( !err && socket->is_open() && socket == c->socket ) { - auto& index = connections.get(); - index.modify_key(connections.project(conn_details), [endpoint](tcp::endpoint& e) { - e = endpoint; - }); + my_impl->connections.update_connection_endpoint(conn_details, endpoint); c->update_endpoints(endpoint); if( c->start_session() ) { c->send_handshake(); @@ -4059,7 +4057,7 @@ namespace eosio { EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); } block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); - fc_dlog( logger, "setting block_sync_rate_limit to ${limit} bytes per second", ("limit", block_sync_rate_limit)); + fc_ilog( logger, "setting block_sync_rate_limit to ${limit} megabytes per second", ("limit", double(block_sync_rate_limit)/1000000)); } catch (boost::numeric::bad_numeric_cast&) { EOS_THROW(plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); } @@ -4208,7 +4206,18 @@ namespace eosio { set_producer_accounts(producer_plug->producer_accounts()); thread_pool.start( thread_pool_size, []( const fc::exception& e ) { - fc_elog( logger, "Exception in net plugin thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); + using traced = boost::error_info; + const boost::stacktrace::stacktrace* st = boost::get_error_info(e); + std::stringstream st_str; + if( st != nullptr ) { + st_str << *st; + fc_elog( logger, "stacktrace wasn't empty"); + fc_elog( logger, "Exception in net plugin thread pool, exiting:\n${e}${trace}", ("e", e.to_detail_string())("trace", st_str.str()) ); + } + else { + fc_elog( logger, "stacktrace was empty"); + fc_elog( logger, "Exception in net plugin thread pool, exiting:\n${e}", ("e", e.to_detail_string()) ); + } app().quit(); } ); @@ -4439,7 +4448,9 @@ namespace eosio { // called by API string connections_manager::connect( const string& host, const string& p2p_address ) { + std::unique_lock g( connections_mtx ); supplied_peers.insert(host); + g.unlock(); return resolve_and_connect( host, p2p_address ); } @@ -4470,7 +4481,7 @@ namespace eosio { .ips = std::move(eps) }); if( !err ) { - it->c->connect( results, connections, it ); + it->c->connect( results, it ); } else { fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", ("host", host)("port", port)( "error", err.message() ) ); @@ -4482,6 +4493,16 @@ namespace eosio { return "added connection"; } + void connections_manager::update_connection_endpoint(connection_details_index::const_iterator it, + const tcp::endpoint& endpoint) { + fc_dlog(logger, "updating connection endpoint"); + std::unique_lock g( connections_mtx ); + auto& index = connections.get(); + index.modify_key(connections.project(it), [endpoint](tcp::endpoint& e) { + e = endpoint; + }); + } + // called by API string connections_manager::disconnect( const string& host ) { std::lock_guard g( connections_mtx ); @@ -4617,12 +4638,15 @@ namespace eosio { if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { if (!c->incoming()) { + g.unlock(); + fc_dlog(logger, "attempting to connect in connection_monitor"); if (!resolve_and_connect(c->peer_address(), c->listen_address)) { it = index.erase(it); --num_peers; ++num_rm; continue; } + g.lock(); } else { --num_clients; ++num_rm; From ec2d36dcbee917eda12d76601a0f13e69c909569 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Mon, 18 Sep 2023 16:15:49 -0500 Subject: [PATCH 15/46] Restored connection reconnect method. WIP --- plugins/net_plugin/net_plugin.cpp | 34 +++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index af86d9f31d..75d3ada772 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -343,7 +343,7 @@ namespace eosio { std::string host; connection_ptr c; tcp::endpoint active_ip; - std::vector ips; + tcp::resolver::results_type ips; operator const connection_ptr&() const { return c; } }; @@ -412,6 +412,7 @@ namespace eosio { string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); void update_connection_endpoint(connection_details_index::const_iterator it, const tcp::endpoint& endpoint); + const connection_details_index::index_const_iterator& get_connection_iterator(const connection_ptr& c); string disconnect(const string& host); void close_all(); @@ -976,6 +977,7 @@ namespace eosio { bool populate_handshake( handshake_message& hello ) const; + bool reconnect(); void connect( const tcp::resolver::results_type& endpoints, connections_manager::connection_details_index::const_iterator conn_details ); void start_read_message(); @@ -2709,9 +2711,7 @@ namespace eosio { //------------------------------------------------------------------------ - // called from connection strand - void connection::connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index::const_iterator conn_details ) { + bool connection::reconnect() { switch ( no_retry ) { case no_reason: case wrong_version: @@ -2720,15 +2720,26 @@ namespace eosio { break; default: fc_dlog( logger, "Skipping connect due to go_away reason ${r}",("r", reason_str( no_retry ))); - return; + return false; } if( consecutive_immediate_connection_close > def_max_consecutive_immediate_connection_close || no_retry == benign_other ) { fc::microseconds connector_period = my_impl->connections.get_connector_period(); fc::lock_guard g( conn_mtx ); if( last_close == fc::time_point() || last_close > fc::time_point::now() - connector_period ) { - return; + return true; } } + connection_ptr c = shared_from_this(); + strand.post([c]() { + auto it = my_impl->connections.get_connection_iterator(c); + c->connect(it->ips, it); + }); + return true; + } + + // called from connection strand + void connection::connect( const tcp::resolver::results_type& endpoints, + connections_manager::connection_details_index::const_iterator conn_details ) { set_state(connection_state::connecting); pending_message_buffer.reset(); buffer_queue.clear_out_queue(); @@ -4473,12 +4484,11 @@ namespace eosio { [resolver, host = host, port = port, peer_address = peer_address, listen_address = listen_address, this]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { connection_ptr c = std::make_shared( peer_address, listen_address ); c->set_heartbeat_timeout( heartbeat_timeout ); - vector eps{results.begin(), results.end()}; std::lock_guard g( connections_mtx ); - auto [it, inserted] = connections.insert( connection_details{ + auto [it, inserted] = connections.emplace( connection_details{ .host = peer_address, .c = std::move(c), - .ips = std::move(eps) + .ips = results }); if( !err ) { it->c->connect( results, it ); @@ -4503,6 +4513,12 @@ namespace eosio { }); } + const connections_manager::connection_details_index::index_const_iterator& connections_manager::get_connection_iterator(const connection_ptr& c) { + std::lock_guard g( connections_mtx ); + const auto& index = connections.get(); + return index.find(c); + } + // called by API string connections_manager::disconnect( const string& host ) { std::lock_guard g( connections_mtx ); From 777c8d89532f17703d69e045ddfd7d1b6e302f80 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 18 Sep 2023 17:05:08 -0500 Subject: [PATCH 16/46] Remove stacktrace additions --- libraries/libfc/include/fc/exception/exception.hpp | 6 +----- plugins/net_plugin/net_plugin.cpp | 14 -------------- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/libraries/libfc/include/fc/exception/exception.hpp b/libraries/libfc/include/fc/exception/exception.hpp index 80fe7694e3..c3baa2c9d5 100644 --- a/libraries/libfc/include/fc/exception/exception.hpp +++ b/libraries/libfc/include/fc/exception/exception.hpp @@ -9,9 +9,6 @@ #include #include #include -#include -#include -#include namespace fc { @@ -397,8 +394,7 @@ namespace fc */ #define FC_THROW_EXCEPTION( EXCEPTION, FORMAT, ... ) \ FC_MULTILINE_MACRO_BEGIN \ - using traced = boost::error_info; \ - throw boost::enable_error_info( EXCEPTION( FC_LOG_MESSAGE( error, FORMAT, __VA_ARGS__ ) ) ) << traced(boost::stacktrace::stacktrace()); \ + throw EXCEPTION( FC_LOG_MESSAGE( error, FORMAT, __VA_ARGS__ ) ); \ FC_MULTILINE_MACRO_END diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index af86d9f31d..68e40e1fea 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -25,8 +25,6 @@ #include #include #include -#include -#include #include #include @@ -4206,18 +4204,6 @@ namespace eosio { set_producer_accounts(producer_plug->producer_accounts()); thread_pool.start( thread_pool_size, []( const fc::exception& e ) { - using traced = boost::error_info; - const boost::stacktrace::stacktrace* st = boost::get_error_info(e); - std::stringstream st_str; - if( st != nullptr ) { - st_str << *st; - fc_elog( logger, "stacktrace wasn't empty"); - fc_elog( logger, "Exception in net plugin thread pool, exiting:\n${e}${trace}", ("e", e.to_detail_string())("trace", st_str.str()) ); - } - else { - fc_elog( logger, "stacktrace was empty"); - fc_elog( logger, "Exception in net plugin thread pool, exiting:\n${e}", ("e", e.to_detail_string()) ); - } app().quit(); } ); From 05be825d4662822d98960233be008f95f3c4deca Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 19 Sep 2023 21:28:06 -0500 Subject: [PATCH 17/46] Update netApi connect test. --- tests/plugin_http_api_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/plugin_http_api_test.py b/tests/plugin_http_api_test.py index 0f49f458ee..c11a5cc21f 100755 --- a/tests/plugin_http_api_test.py +++ b/tests/plugin_http_api_test.py @@ -791,7 +791,12 @@ def test_NetApi(self) : ret_json = self.nodeos.processUrllibRequest(resource, command, self.empty_content_dict, endpoint=endpoint) self.assertEqual(ret_json["code"], 400) self.assertEqual(ret_json["error"]["code"], 3200006) + # connect with incomplete content parameter payload = "localhost" + ret_json = self.nodeos.processUrllibRequest(resource, command, payload, endpoint=endpoint) + self.assertEqual(ret_json["code"], 201) + self.assertEqual(ret_json["payload"], 'invalid peer address') + payload = "localhost:9877" ret_str = self.nodeos.processUrllibRequest(resource, command, payload, returnType=ReturnType.raw, endpoint=endpoint).decode('ascii') self.assertEqual("\"added connection\"", ret_str) From 24f4aad3e1b54175b21ecb66bbae8d7d6378c992 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 20 Sep 2023 11:39:01 -0500 Subject: [PATCH 18/46] GH-1295 Project index to default iterator --- plugins/net_plugin/net_plugin.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 9878cf4fa6..cd42cdbfcb 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -416,7 +416,7 @@ namespace eosio { string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); void update_connection_endpoint(connection_details_index::const_iterator it, const tcp::endpoint& endpoint); - const connection_details_index::index_const_iterator& get_connection_iterator(const connection_ptr& c); + connection_details_index::iterator get_connection_iterator(const connection_ptr& c); string disconnect(const string& host); void close_all(); @@ -4512,10 +4512,11 @@ namespace eosio { }); } - const connections_manager::connection_details_index::index_const_iterator& connections_manager::get_connection_iterator(const connection_ptr& c) { + connections_manager::connection_details_index::iterator connections_manager::get_connection_iterator(const connection_ptr& c) { std::lock_guard g( connections_mtx ); const auto& index = connections.get(); - return index.find(c); + auto i = index.find(c); + return connections.project(i); } // called by API From 97591fe467defed6d96d7efe6ffcf76d48198c92 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 20 Sep 2023 15:49:25 -0500 Subject: [PATCH 19/46] Use reconnect method as intended, and avoid threading issue. --- plugins/net_plugin/net_plugin.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index cd42cdbfcb..e4d7b2dd53 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4656,8 +4656,10 @@ namespace eosio { if (!c->incoming()) { g.unlock(); fc_dlog(logger, "attempting to connect in connection_monitor"); - if (!resolve_and_connect(c->peer_address(), c->listen_address)) { + if (!c->reconnect()) { + g.lock(); it = index.erase(it); + g.unlock(); --num_peers; ++num_rm; continue; From ffee0df209a0c348faea4310363e963f5ab8f366 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 20 Sep 2023 18:00:49 -0500 Subject: [PATCH 20/46] Use std::any_of when finding supplied peers to unlimit a connection. --- plugins/net_plugin/net_plugin.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index e4d7b2dd53..577c00db57 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -424,7 +424,7 @@ namespace eosio { vector connection_statuses() const; template - void for_each_supplied_peer(Function&& f) const; + bool for_any_supplied_peer(Function&& f) const; template void for_each_connection(Function&& f) const; @@ -1184,9 +1184,9 @@ namespace eosio { template - void connections_manager::for_each_supplied_peer( Function&& f ) const { + bool connections_manager::for_any_supplied_peer( Function&& f ) const { std::shared_lock g( connections_mtx ); - std::for_each(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); + return std::any_of(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); } template @@ -2795,14 +2795,16 @@ namespace eosio { visitors < connections.get_max_client_count())) { fc_ilog(logger, "Accepted new connection: " + paddr_str); - connections.for_each_supplied_peer([&listen_address, &paddr_str, &limit](const string& peer_addr) { + connections.for_any_supplied_peer([&listen_address, &paddr_str, &limit](const string& peer_addr) { auto [host, port, type] = split_host_port_type(peer_addr); if (host == paddr_str) { if (limit > 0) { fc_dlog(logger, "Connection inbound to ${la} from ${a} is a configured p2p-peer-address and will not be throttled", ("la", listen_address)("a", paddr_str)); } limit = 0; + return true; } + return false; }); connection_ptr new_connection = std::make_shared(std::move(socket), listen_address, limit); From fb740ef57264587596c0844b0dd409809c3b3170 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 22 Sep 2023 20:10:38 -0500 Subject: [PATCH 21/46] Change language in p2p_multiple_listen_test for clarity. --- tests/p2p_multiple_listen_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/p2p_multiple_listen_test.py b/tests/p2p_multiple_listen_test.py index 62f1534c63..7f537e9d35 100755 --- a/tests/p2p_multiple_listen_test.py +++ b/tests/p2p_multiple_listen_test.py @@ -75,7 +75,7 @@ assert conn['last_handshake']['p2p_address'].split()[0] == 'localhost:9878', f"Connected node is listening on '{conn['last_handshake']['p2p_address'].split()[0]}' instead of port 9878" elif conn['last_handshake']['agent'] == 'node-04': assert conn['last_handshake']['p2p_address'].split()[0] == 'localhost:9880', f"Connected node is listening on '{conn['last_handshake']['p2p_address'].split()[0]}' instead of port 9880" - assert open_socket_count == 2, 'Node 0 is expected to have only two open sockets' + assert open_socket_count == 2, 'Node 0 is expected to have exactly two open sockets' connections = cluster.nodes[2].processUrllibRequest('net', 'connections') open_socket_count = 0 @@ -84,7 +84,7 @@ open_socket_count += 1 assert conn['last_handshake']['agent'] == 'node-00', f"Connected node identifed as '{conn['last_handshake']['agent']}' instead of node-00" assert conn['last_handshake']['p2p_address'].split()[0] == 'ext-ip0:20000', f"Connected node is advertising '{conn['last_handshake']['p2p_address'].split()[0]}' instead of ext-ip0:20000" - assert open_socket_count == 1, 'Node 2 is expected to have only one open socket' + assert open_socket_count == 1, 'Node 2 is expected to have exactly one open socket' connections = cluster.nodes[4].processUrllibRequest('net', 'connections') open_socket_count = 0 @@ -93,7 +93,7 @@ open_socket_count += 1 assert conn['last_handshake']['agent'] == 'node-00', f"Connected node identifed as '{conn['last_handshake']['agent']}' instead of node-00" assert conn['last_handshake']['p2p_address'].split()[0] == 'ext-ip1:20001', f"Connected node is advertising '{conn['last_handshake']['p2p_address'].split()[0]} 'instead of ext-ip1:20001" - assert open_socket_count == 1, 'Node 4 is expected to have only one open socket' + assert open_socket_count == 1, 'Node 4 is expected to have exactly one open socket' testSuccessful=True finally: From 453bfcfe2e91b7833013fd3779ac626e1b4f5d43 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 22 Sep 2023 20:11:31 -0500 Subject: [PATCH 22/46] Tolerate duplicate (empty) peer addresses in connection manager. --- plugins/net_plugin/net_plugin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 577c00db57..4351676af1 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -354,7 +354,7 @@ namespace eosio { using connection_details_index = multi_index_container< connection_details, indexed_by< - ordered_unique< + ordered_non_unique< tag, key<&connection_details::host> >, @@ -4558,10 +4558,10 @@ namespace eosio { vector connections_manager::connection_statuses()const { vector result; std::shared_lock g( connections_mtx ); - auto& index = connections.get(); + auto& index = connections.get(); result.reserve( index.size() ); for( const connection_ptr& c : index ) { - result.push_back( c->get_status() ); + result.emplace_back( c->get_status() ); } return result; } From 99f02c0641b0f776fa66f7fabc89f39c2a1d0e1d Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 26 Sep 2023 19:27:34 -0500 Subject: [PATCH 23/46] Add rate limit parse unittest. --- plugins/net_plugin/net_plugin.cpp | 60 ++++++++++--------- plugins/net_plugin/tests/CMakeLists.txt | 10 +++- .../tests/rate_limit_parse_unittest.cpp | 54 +++++++++++++++++ 3 files changed, 96 insertions(+), 28 deletions(-) create mode 100644 plugins/net_plugin/tests/rate_limit_parse_unittest.cpp diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 4351676af1..62ec05256a 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -573,7 +573,8 @@ namespace eosio { constexpr static uint16_t to_protocol_version(uint16_t v); - size_t parse_connection_rate_limit(const string& limit_str); + std::tuple parse_listen_address(const std::string& peer) const; + size_t parse_connection_rate_limit(const string& limit_str) const; void plugin_initialize(const variables_map& options); void plugin_startup(); void plugin_shutdown(); @@ -4056,13 +4057,34 @@ namespace eosio { return fc::json::from_string(s).as(); } - size_t net_plugin_impl::parse_connection_rate_limit( const std::string& limit_str) { + std::tuple net_plugin_impl::parse_listen_address( const std::string& address ) const { + auto listen_addr = address; + auto limit = string("0"); + auto last_colon_location = address.rfind(':'); + if( auto right_bracket_location = address.find(']'); right_bracket_location != address.npos ) { + if( std::count(address.begin()+right_bracket_location, address.end(), ':') > 1 ) { + listen_addr = std::string(address, 0, last_colon_location); + limit = std::string(address, last_colon_location+1); + } + } else { + if( auto colon_count = std::count(address.begin(), address.end(), ':'); colon_count > 1 ) { + EOS_ASSERT( colon_count <= 2, plugin_config_exception, "Invalid address specification ${addr}; IPv6 addresses must be enclosed in square brackets.", ("addr", address)); + listen_addr = std::string(address, 0, last_colon_location); + limit = std::string(address, last_colon_location+1); + } + } + auto block_sync_rate_limit = parse_connection_rate_limit(limit); + + return {listen_addr, block_sync_rate_limit}; + } + + size_t net_plugin_impl::parse_connection_rate_limit( const std::string& limit_str) const { std::istringstream in(limit_str); fc_dlog( logger, "parsing connection endpoint limit ${limit} with locale ${l}", ("limit", limit_str)("l", std::locale("").name())); in.imbue(std::locale("")); double limit{0}; in >> limit; - EOS_ASSERT(limit >= 0.0, plugin_config_exception, "block sync rate limit must be positive: ${limit}", ("limit", limit_str)); + EOS_ASSERT(limit >= 0.0, plugin_config_exception, "block sync rate limit must not be negative: ${limit}", ("limit", limit_str)); size_t block_sync_rate_limit = 0; if( limit > 0.0 ) { std::string units; @@ -4070,14 +4092,14 @@ namespace eosio { std::regex units_regex{"([KMGT]?[i]?)B/s"}; std::smatch units_match; std::regex_match(units, units_match, units_regex); - try { - if( units.length() > 0 ) { - EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); + if( units.length() > 0 ) { + EOS_ASSERT(units_match.size() == 2, plugin_config_exception, "invalid block sync rate limit specification: ${limit}", ("limit", units)); + try { + block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); + fc_ilog( logger, "setting block_sync_rate_limit to ${limit} megabytes per second", ("limit", double(block_sync_rate_limit)/1000000)); + } catch (boost::numeric::bad_numeric_cast&) { + EOS_THROW(plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); } - block_sync_rate_limit = boost::numeric_cast(limit * prefix_multipliers.at(units_match[1].str())); - fc_ilog( logger, "setting block_sync_rate_limit to ${limit} megabytes per second", ("limit", double(block_sync_rate_limit)/1000000)); - } catch (boost::numeric::bad_numeric_cast&) { - EOS_THROW(plugin_config_exception, "block sync rate limit specification overflowed: ${limit}", ("limit", limit_str)); } } return block_sync_rate_limit; @@ -4298,23 +4320,7 @@ namespace eosio { std::string extra_listening_log_info = ", max clients is " + std::to_string(my->connections.get_max_client_count()); - auto listen_addr = address; - auto limit = string("0"); - auto last_colon_location = address.rfind(':'); - if( auto right_bracket_location = address.find(']'); right_bracket_location != address.npos ) { - if( std::count(address.begin()+right_bracket_location, address.end(), ':') > 1 ) { - listen_addr = std::string(address, 0, last_colon_location); - limit = std::string(address, last_colon_location+1); - } - } else { - if( auto colon_count = std::count(address.begin(), address.end(), ':'); colon_count > 1 ) { - EOS_ASSERT( colon_count <= 2, plugin_config_exception, "Invalid address specification ${addr}; IPv6 addresses must be enclosed in square brackets.", ("addr", address)); - listen_addr = std::string(address, 0, last_colon_location); - limit = std::string(address, last_colon_location+1); - } - } - - auto block_sync_rate_limit = my->parse_connection_rate_limit(limit); + auto [listen_addr, block_sync_rate_limit] = my->parse_listen_address(address); fc::create_listener( my->thread_pool.get_executor(), logger, accept_timeout, listen_addr, extra_listening_log_info, diff --git a/plugins/net_plugin/tests/CMakeLists.txt b/plugins/net_plugin/tests/CMakeLists.txt index bcabe6428f..210a748e07 100644 --- a/plugins/net_plugin/tests/CMakeLists.txt +++ b/plugins/net_plugin/tests/CMakeLists.txt @@ -5,4 +5,12 @@ target_link_libraries(auto_bp_peering_unittest eosio_chain) target_include_directories(auto_bp_peering_unittest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include" ) -add_test(auto_bp_peering_unittest auto_bp_peering_unittest) \ No newline at end of file +add_test(auto_bp_peering_unittest auto_bp_peering_unittest) + +add_executable(rate_limit_parse_unittest rate_limit_parse_unittest.cpp) + +target_link_libraries(rate_limit_parse_unittest net_plugin) + +target_include_directories(rate_limit_parse_unittest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include") + +add_test(rate_limit_parse_unittest rate_limit_parse_unittest) diff --git a/plugins/net_plugin/tests/rate_limit_parse_unittest.cpp b/plugins/net_plugin/tests/rate_limit_parse_unittest.cpp new file mode 100644 index 0000000000..01c84e8a67 --- /dev/null +++ b/plugins/net_plugin/tests/rate_limit_parse_unittest.cpp @@ -0,0 +1,54 @@ +#define BOOST_TEST_MODULE rate_limit_parsing +#include +#include "../net_plugin.cpp" + +BOOST_AUTO_TEST_CASE(test_parse_rate_limit) { + eosio::net_plugin_impl plugin_impl; + std::vector p2p_addresses = { + "0.0.0.0:9876" + , "0.0.0.0:9776:0" + , "0.0.0.0:9877:640KB/s" + , "192.168.0.1:9878:20MiB/s" + , "localhost:9879:0.5KB/s" + , "[2001:db8:85a3:8d3:1319:8a2e:370:7348]:9876:250KB/s" + , "[::1]:9876:250KB/s" + , "2001:db8:85a3:8d3:1319:8a2e:370:7348:9876:250KB/s" + , "[::1]:9876:-250KB/s" + , "0.0.0.0:9877:640Kb/s" + , "0.0.0.0:9877:999999999999999999999999999TiB/s" + }; + size_t which = 0; + auto [listen_addr, block_sync_rate_limit] = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "0.0.0.0:9876"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 0); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "0.0.0.0:9776"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 0); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "0.0.0.0:9877"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 640000); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "192.168.0.1:9878"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 20971520); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "localhost:9879"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 500); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "[2001:db8:85a3:8d3:1319:8a2e:370:7348]:9876"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 250000); + std::tie(listen_addr, block_sync_rate_limit) = plugin_impl.parse_listen_address(p2p_addresses[which++]); + BOOST_CHECK_EQUAL(listen_addr, "[::1]:9876"); + BOOST_CHECK_EQUAL(block_sync_rate_limit, 250000); + BOOST_CHECK_EXCEPTION(plugin_impl.parse_listen_address(p2p_addresses[which++]), eosio::chain::plugin_config_exception, + [](const eosio::chain::plugin_config_exception& e) + {return std::strstr(e.top_message().c_str(), "IPv6 addresses must be enclosed in square brackets");}); + BOOST_CHECK_EXCEPTION(plugin_impl.parse_listen_address(p2p_addresses[which++]), eosio::chain::plugin_config_exception, + [](const eosio::chain::plugin_config_exception& e) + {return std::strstr(e.top_message().c_str(), "block sync rate limit must not be negative");}); + BOOST_CHECK_EXCEPTION(plugin_impl.parse_listen_address(p2p_addresses[which++]), eosio::chain::plugin_config_exception, + [](const eosio::chain::plugin_config_exception& e) + {return std::strstr(e.top_message().c_str(), "invalid block sync rate limit specification");}); + BOOST_CHECK_EXCEPTION(plugin_impl.parse_listen_address(p2p_addresses[which++]), eosio::chain::plugin_config_exception, + [](const eosio::chain::plugin_config_exception& e) + {return std::strstr(e.top_message().c_str(), "block sync rate limit specification overflowed");}); +} From b16184aa25c5a2eafc9da0c7d8df7cba5490db30 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 14:42:24 -0500 Subject: [PATCH 24/46] Tolerate node running with no listen endpoints. Add mock_connection constructor required by clang14. --- .../include/eosio/net_plugin/auto_bp_peering.hpp | 2 +- plugins/net_plugin/net_plugin.cpp | 11 +++++++++-- plugins/net_plugin/tests/auto_bp_peering_unittest.cpp | 6 ++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp b/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp index 8a4f736680..d88c47729e 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/auto_bp_peering.hpp @@ -182,7 +182,7 @@ class bp_connection_manager { fc_dlog(self()->get_logger(), "pending_downstream_neighbors: ${pending_downstream_neighbors}", ("pending_downstream_neighbors", to_string(pending_downstream_neighbors))); - for (auto neighbor : pending_downstream_neighbors) { self()->connections.resolve_and_connect(config.bp_peer_addresses[neighbor], *self()->p2p_addresses.begin() ); } + for (auto neighbor : pending_downstream_neighbors) { self()->connections.resolve_and_connect(config.bp_peer_addresses[neighbor], self()->get_first_p2p_address() ); } pending_neighbors = std::move(pending_downstream_neighbors); finder.add_upstream_neighbors(pending_neighbors); diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 62ec05256a..f22636bd65 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -454,6 +454,7 @@ namespace eosio { */ vector p2p_addresses; vector p2p_server_addresses; + const string& get_first_p2p_address() const; vector allowed_peers; ///< peer keys allowed to connect std::map 0 ? *p2p_addresses.begin() : empty; + } + void net_plugin_impl::create_session(tcp::socket&& socket, const string listen_address, size_t limit) { uint32_t visitors = 0; uint32_t from_addr = 0; @@ -4337,7 +4344,7 @@ namespace eosio { my->ticker(); my->start_monitors(); my->update_chain_info(); - my->connections.connect_supplied_peers(*my->p2p_addresses.begin()); // attribute every outbound connection to the first listen port + my->connections.connect_supplied_peers(my->get_first_p2p_address()); // attribute every outbound connection to the first listen port when one exists }); } @@ -4374,7 +4381,7 @@ namespace eosio { /// RPC API string net_plugin::connect( const string& host ) { - return my->connections.connect( host, *my->p2p_addresses.begin() ); + return my->connections.connect( host, my->get_first_p2p_address() ); } /// RPC API diff --git a/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp b/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp index d9e0594793..57c7a8f6a1 100644 --- a/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp +++ b/plugins/net_plugin/tests/auto_bp_peering_unittest.cpp @@ -6,6 +6,11 @@ struct mock_connection { bool is_bp_connection = false; bool is_open = false; bool handshake_received = false; + mock_connection(bool bp_connection, bool open, bool received) + : is_bp_connection(bp_connection) + , is_open(open) + , handshake_received(received) + {} bool socket_is_open() const { return is_open; } bool incoming_and_handshake_received() const { return handshake_received; } @@ -37,6 +42,7 @@ struct mock_net_plugin : eosio::auto_bp_peering::bp_connection_manager p2p_addresses{"0.0.0.0:9876"}; + const std::string& get_first_p2p_address() const { return *p2p_addresses.begin(); } bool in_sync() { return is_in_sync; } From df6d948acb2a8c62d512ba1c02311a12e98eaa50 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 14:46:34 -0500 Subject: [PATCH 25/46] Restore lock of connections mutex when connecting configured peers. --- plugins/net_plugin/net_plugin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index f22636bd65..bdc43ade77 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4456,6 +4456,7 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { + std::unique_lock g(connections_mtx); for (const auto& peer : supplied_peers) { resolve_and_connect(peer, p2p_address); } From 733849b51c3f4b354404d40ab32ffefbd029da78 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 15:27:11 -0500 Subject: [PATCH 26/46] Don't pass around iterators that may be invalidated by an erase. --- plugins/net_plugin/net_plugin.cpp | 37 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index bdc43ade77..b4073814f0 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -361,10 +361,6 @@ namespace eosio { ordered_unique< tag, key<&connection_details::c> - >, - ordered_non_unique< - tag, - key<&connection_details::active_ip> > > >; @@ -415,7 +411,7 @@ namespace eosio { void add(connection_ptr c); string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); - void update_connection_endpoint(connection_details_index::const_iterator it, const tcp::endpoint& endpoint); + void update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint); connection_details_index::iterator get_connection_iterator(const connection_ptr& c); string disconnect(const string& host); void close_all(); @@ -810,7 +806,8 @@ namespace eosio { /// assignment not allowed block_status_monitor& operator=( const block_status_monitor& ) = delete; block_status_monitor& operator=( block_status_monitor&& ) = delete; - }; + }; // block_status_monitor + class connection : public std::enable_shared_from_this { public: @@ -986,8 +983,7 @@ namespace eosio { bool populate_handshake( handshake_message& hello ) const; bool reconnect(); - void connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index::const_iterator conn_details ); + void connect( const tcp::resolver::results_type& endpoints ); void start_read_message(); /** \brief Process the next message from the pending message buffer @@ -2741,22 +2737,21 @@ namespace eosio { connection_ptr c = shared_from_this(); strand.post([c]() { auto it = my_impl->connections.get_connection_iterator(c); - c->connect(it->ips, it); + c->connect(it->ips); }); return true; } // called from connection strand - void connection::connect( const tcp::resolver::results_type& endpoints, - connections_manager::connection_details_index::const_iterator conn_details ) { + void connection::connect( const tcp::resolver::results_type& endpoints ) { set_state(connection_state::connecting); pending_message_buffer.reset(); buffer_queue.clear_out_queue(); boost::asio::async_connect( *socket, endpoints, boost::asio::bind_executor( strand, - [c = shared_from_this(), socket=socket, conn_details]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { + [c = shared_from_this(), socket=socket]( const boost::system::error_code& err, const tcp::endpoint& endpoint ) { if( !err && socket->is_open() && socket == c->socket ) { - my_impl->connections.update_connection_endpoint(conn_details, endpoint); + my_impl->connections.update_connection_endpoint(c, endpoint); c->update_endpoints(endpoint); if( c->start_session() ) { c->send_handshake(); @@ -4506,7 +4501,7 @@ namespace eosio { .ips = results }); if( !err ) { - it->c->connect( results, it ); + it->c->connect( results ); } else { fc_elog( logger, "Unable to resolve ${host}:${port} ${error}", ("host", host)("port", port)( "error", err.message() ) ); @@ -4518,14 +4513,16 @@ namespace eosio { return "added connection"; } - void connections_manager::update_connection_endpoint(connection_details_index::const_iterator it, + void connections_manager::update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint) { - fc_dlog(logger, "updating connection endpoint"); std::unique_lock g( connections_mtx ); - auto& index = connections.get(); - index.modify_key(connections.project(it), [endpoint](tcp::endpoint& e) { - e = endpoint; - }); + auto& index = connections.get(); + const auto& it = index.find(c); + if( it != index.end() ) { + index.modify(it, [endpoint](connection_details& d) { + d.active_ip = endpoint; + }); + } } connections_manager::connection_details_index::iterator connections_manager::get_connection_iterator(const connection_ptr& c) { From ed692388965c86e9409dc0d3972efa81530dfca0 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 15:33:19 -0500 Subject: [PATCH 27/46] Renamed method. --- plugins/net_plugin/net_plugin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index b4073814f0..7b5774c395 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -420,7 +420,7 @@ namespace eosio { vector connection_statuses() const; template - bool for_any_supplied_peer(Function&& f) const; + bool any_of_supplied_peers(Function&& f) const; template void for_each_connection(Function&& f) const; @@ -1184,7 +1184,7 @@ namespace eosio { template - bool connections_manager::for_any_supplied_peer( Function&& f ) const { + bool connections_manager::any_of_supplied_peers( Function&& f ) const { std::shared_lock g( connections_mtx ); return std::any_of(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); } @@ -2798,7 +2798,7 @@ namespace eosio { visitors < connections.get_max_client_count())) { fc_ilog(logger, "Accepted new connection: " + paddr_str); - connections.for_any_supplied_peer([&listen_address, &paddr_str, &limit](const string& peer_addr) { + connections.any_of_supplied_peers([&listen_address, &paddr_str, &limit](const string& peer_addr) { auto [host, port, type] = split_host_port_type(peer_addr); if (host == paddr_str) { if (limit > 0) { From 2f80663a2343b495fad30d95d0e3cfb96ac80206 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 15:49:19 -0500 Subject: [PATCH 28/46] Break encapsulation less. Delegate reconnecting back to connections_manager rather than have connection try to do it itself. --- plugins/net_plugin/net_plugin.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 7b5774c395..b00462715e 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -412,7 +412,7 @@ namespace eosio { string connect(const string& host, const string& p2p_address); string resolve_and_connect(const string& host, const string& p2p_address); void update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint); - connection_details_index::iterator get_connection_iterator(const connection_ptr& c); + void connect(const connection_ptr& c); string disconnect(const string& host); void close_all(); @@ -2731,13 +2731,12 @@ namespace eosio { fc::microseconds connector_period = my_impl->connections.get_connector_period(); fc::lock_guard g( conn_mtx ); if( last_close == fc::time_point() || last_close > fc::time_point::now() - connector_period ) { - return true; + return true; // true so doesn't remove from valid connections } } connection_ptr c = shared_from_this(); strand.post([c]() { - auto it = my_impl->connections.get_connection_iterator(c); - c->connect(it->ips); + my_impl->connections.connect(c); }); return true; } @@ -4525,11 +4524,13 @@ namespace eosio { } } - connections_manager::connection_details_index::iterator connections_manager::get_connection_iterator(const connection_ptr& c) { + void connections_manager::connect(const connection_ptr& c) { std::lock_guard g( connections_mtx ); const auto& index = connections.get(); - auto i = index.find(c); - return connections.project(i); + const auto& it = index.find(c); + if( it != index.end() ) { + it->c->connect( it->ips ); + } } // called by API From 7019b657a7cb5d887ee9e41a6859410c0aa26b08 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 15:53:45 -0500 Subject: [PATCH 29/46] Thread safety. --- plugins/net_plugin/net_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index b00462715e..fde60d3255 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4548,9 +4548,9 @@ namespace eosio { } void connections_manager::close_all() { + std::lock_guard g( connections_mtx ); auto& index = connections.get(); fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); - std::lock_guard g( connections_mtx ); for( const connection_ptr& c : index ) { fc_dlog( logger, "close: ${cid}", ("cid", c->connection_id) ); c->close( false, true ); From 4d136e33a2b1f80fae149427167f486fee1be7cc Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 16:00:48 -0500 Subject: [PATCH 30/46] Revert "Restore lock of connections mutex when connecting configured peers." This reverts commit df6d948acb2a8c62d512ba1c02311a12e98eaa50. --- plugins/net_plugin/net_plugin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index fde60d3255..59fabe5913 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4450,7 +4450,6 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { - std::unique_lock g(connections_mtx); for (const auto& peer : supplied_peers) { resolve_and_connect(peer, p2p_address); } From 3708418ae178e8f9602441db4ae3267b018aa9f2 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 16:19:35 -0500 Subject: [PATCH 31/46] Restore lock of connections mutex when connecting configured peers. --- plugins/net_plugin/net_plugin.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 59fabe5913..57987e58a7 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4450,7 +4450,10 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { - for (const auto& peer : supplied_peers) { + std::unique_lock g(connections_mtx); + chain::flat_set peers = supplied_peers; + g.unlock(); + for (const auto& peer : peers) { resolve_and_connect(peer, p2p_address); } } From 4baec727ab8bd2d8366abba907f84c70e247ec0c Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 16:29:30 -0500 Subject: [PATCH 32/46] Accept suggested refactoring. --- plugins/net_plugin/net_plugin.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 57987e58a7..1a5b41cbf9 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1725,16 +1725,12 @@ namespace eosio { if( block_sync_rate_limit > 0 ) { auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); - if( current_rate < block_sync_rate_limit ) { - block_sync_bytes_sent += enqueue_block( sb, true ); - ++peer_requested->last; - } else { + if( current_rate >= block_sync_rate_limit ) { return false; } - } else { - block_sync_bytes_sent += enqueue_block( sb, true ); - ++peer_requested->last; } + block_sync_bytes_sent += enqueue_block( sb, true ); + ++peer_requested->last; } else { peer_ilog( this, "enqueue sync, unable to fetch block ${num}, sending benign_other go away", ("num", num) ); peer_requested.reset(); // unable to provide requested blocks From 8d2c1c2426e5d395bce80770fa6e86c52eff5e7a Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 27 Sep 2023 16:30:32 -0500 Subject: [PATCH 33/46] Remove some unused machine-generated variables from custom shape file. --- tests/p2p_sync_throttle_test_shape.json | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/p2p_sync_throttle_test_shape.json b/tests/p2p_sync_throttle_test_shape.json index 4252ab483a..8cfb5ce9a5 100644 --- a/tests/p2p_sync_throttle_test_shape.json +++ b/tests/p2p_sync_throttle_test_shape.json @@ -17,8 +17,6 @@ "eosio" ], "dont_start": false, - "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_bios", - "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_bios", "p2p_port": 9776, "http_port": 8788, "host_name": "localhost", @@ -60,8 +58,6 @@ "defproduceru" ], "dont_start": false, - "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_00", - "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_00", "p2p_port": 9876, "http_port": 8888, "host_name": "localhost", @@ -83,8 +79,6 @@ ], "producers": [], "dont_start": false, - "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_01", - "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_01", "p2p_port": 9877, "http_port": 8889, "host_name": "localhost", @@ -106,8 +100,6 @@ ], "producers": [], "dont_start": true, - "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_02", - "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_02", "p2p_port": 9878, "http_port": 8890, "host_name": "localhost", @@ -129,8 +121,6 @@ ], "producers": [], "dont_start": true, - "config_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_03", - "data_dir_name": "/home/giszczakj/Dev/leap/build/TestLogs/p2p_sync_throttle_test1192292/node_03", "p2p_port": 9879, "http_port": 8891, "host_name": "localhost", From 7e37de11f46854f2b82e34174717300ac64b0310 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Thu, 28 Sep 2023 17:53:08 -0500 Subject: [PATCH 34/46] Convert connections mutex to resursive_mutex and update locks. Split prometheus statistics out of connection_monitor into connection_statistics_monitor. --- plugins/net_plugin/net_plugin.cpp | 195 ++++++++++++++++++------------ 1 file changed, 118 insertions(+), 77 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 1a5b41cbf9..113c0bc94d 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -364,15 +364,18 @@ namespace eosio { > > >; + enum class timer_type { check, stats }; private: alignas(hardware_destructive_interference_size) - mutable std::shared_mutex connections_mtx; + mutable std::recursive_mutex connections_mtx; connection_details_index connections; chain::flat_set supplied_peers; alignas(hardware_destructive_interference_size) fc::mutex connector_check_timer_mtx; unique_ptr connector_check_timer GUARDED_BY(connector_check_timer_mtx); + fc::mutex connection_stats_timer_mtx; + unique_ptr connection_stats_timer GUARDED_BY(connection_stats_timer_mtx); /// thread safe, only modified on startup std::chrono::milliseconds heartbeat_timeout{def_keepalive_interval*2}; @@ -385,6 +388,7 @@ namespace eosio { connection_ptr find_connection_i(const string& host) const; void connection_monitor(const std::weak_ptr& from_connection); + void connection_statistics_monitor(const std::weak_ptr& from_connection); public: size_t number_connections() const; @@ -404,9 +408,11 @@ namespace eosio { void connect_supplied_peers(const string& p2p_address); - void start_conn_timer(); - void start_conn_timer(boost::asio::steady_timer::duration du, std::weak_ptr from_connection); - void stop_conn_timer(); + void start_conn_timers(); + void start_conn_timer(boost::asio::steady_timer::duration du, + std::weak_ptr from_connection, + timer_type which); + void stop_conn_timers(); void add(connection_ptr c); string connect(const string& host, const string& p2p_address); @@ -1185,20 +1191,20 @@ namespace eosio { template bool connections_manager::any_of_supplied_peers( Function&& f ) const { - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); return std::any_of(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); } template void connections_manager::for_each_connection( Function&& f ) const { - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); std::for_each(index.begin(), index.end(), std::forward(f)); } template void connections_manager::for_each_block_connection( Function&& f ) const { - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); for( const connection_ptr& c : index ) { if (c->is_blocks_connection()) { @@ -1209,14 +1215,14 @@ namespace eosio { template bool connections_manager::any_of_connections(UnaryPredicate&& p) const { - std::shared_lock g(connections_mtx); + const std::lock_guard g(connections_mtx); auto& index = connections.get(); return std::any_of(index.cbegin(), index.cend(), std::forward(p)); } template bool connections_manager::any_of_block_connections(UnaryPredicate&& p) const { - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); for( const connection_ptr& c : index ) { if( c->is_blocks_connection() ) { @@ -1444,7 +1450,9 @@ namespace eosio { set_state(connection_state::closed); if( reconnect && !shutdown ) { - my_impl->connections.start_conn_timer( std::chrono::milliseconds( 100 ), connection_wptr() ); + my_impl->connections.start_conn_timer( std::chrono::milliseconds( 100 ), + connection_wptr(), + connections_manager::timer_type::check ); } } @@ -3117,7 +3125,7 @@ namespace eosio { void net_plugin_impl::plugin_shutdown() { in_shutdown = true; - connections.stop_conn_timer(); + connections.stop_conn_timers(); { fc::lock_guard g( expire_timer_mtx ); if( expire_timer ) @@ -3821,7 +3829,7 @@ namespace eosio { fc::lock_guard g( expire_timer_mtx ); expire_timer = std::make_unique( my_impl->thread_pool.get_executor() ); } - connections.start_conn_timer(); + connections.start_conn_timers(); start_expire_timer(); } @@ -4416,12 +4424,12 @@ namespace eosio { //---------------------------------------------------------------------------- size_t connections_manager::number_connections() const { - std::lock_guard g(connections_mtx); + const std::lock_guard g(connections_mtx); return connections.size(); } void connections_manager::add_supplied_peers(const vector& peers ) { - std::lock_guard g(connections_mtx); + const std::lock_guard g(connections_mtx); supplied_peers.insert( peers.begin(), peers.end() ); } @@ -4446,16 +4454,15 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { - std::unique_lock g(connections_mtx); + const std::lock_guard g(connections_mtx); chain::flat_set peers = supplied_peers; - g.unlock(); for (const auto& peer : peers) { resolve_and_connect(peer, p2p_address); } } void connections_manager::add( connection_ptr c ) { - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); boost::system::error_code ec; auto endpoint = c->socket->remote_endpoint(ec); connections.insert( connection_details{ @@ -4466,9 +4473,8 @@ namespace eosio { // called by API string connections_manager::connect( const string& host, const string& p2p_address ) { - std::unique_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); supplied_peers.insert(host); - g.unlock(); return resolve_and_connect( host, p2p_address ); } @@ -4479,7 +4485,7 @@ namespace eosio { return "invalid peer address"; } - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); if( find_connection_i( peer_address ) ) return "already connected"; @@ -4491,7 +4497,7 @@ namespace eosio { [resolver, host = host, port = port, peer_address = peer_address, listen_address = listen_address, this]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { connection_ptr c = std::make_shared( peer_address, listen_address ); c->set_heartbeat_timeout( heartbeat_timeout ); - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto [it, inserted] = connections.emplace( connection_details{ .host = peer_address, .c = std::move(c), @@ -4512,7 +4518,7 @@ namespace eosio { void connections_manager::update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint) { - std::unique_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); const auto& it = index.find(c); if( it != index.end() ) { @@ -4523,7 +4529,7 @@ namespace eosio { } void connections_manager::connect(const connection_ptr& c) { - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); const auto& index = connections.get(); const auto& it = index.find(c); if( it != index.end() ) { @@ -4533,7 +4539,7 @@ namespace eosio { // called by API string connections_manager::disconnect( const string& host ) { - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); if( auto i = index.find( host ); i != index.end() ) { fc_ilog( logger, "disconnecting: ${cid}", ("cid", i->c->connection_id) ); @@ -4546,7 +4552,7 @@ namespace eosio { } void connections_manager::close_all() { - std::lock_guard g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); for( const connection_ptr& c : index ) { @@ -4557,7 +4563,7 @@ namespace eosio { } std::optional connections_manager::status( const string& host )const { - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto con = find_connection_i( host ); if( con ) { return con->get_status(); @@ -4567,7 +4573,7 @@ namespace eosio { vector connections_manager::connection_statuses()const { vector result; - std::shared_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); result.reserve( index.size() ); for( const connection_ptr& c : index ) { @@ -4586,28 +4592,42 @@ namespace eosio { } // called from any thread - void connections_manager::start_conn_timer() { - start_conn_timer(connector_period, {}); // this locks mutex + void connections_manager::start_conn_timers() { + start_conn_timer(connector_period, {}, timer_type::check); // this locks mutex + start_conn_timer(connector_period, {}, timer_type::stats); // this locks mutex } // called from any thread - void connections_manager::start_conn_timer(boost::asio::steady_timer::duration du, std::weak_ptr from_connection) { - fc::lock_guard g( connector_check_timer_mtx ); - if (!connector_check_timer) { - connector_check_timer = std::make_unique( my_impl->thread_pool.get_executor() ); - } - connector_check_timer->expires_from_now( du ); - connector_check_timer->async_wait( [this, from_connection{std::move(from_connection)}](boost::system::error_code ec) mutable { + void connections_manager::start_conn_timer(boost::asio::steady_timer::duration du, + std::weak_ptr from_connection, + timer_type which) { + auto& mtx = which == timer_type::check ? connector_check_timer_mtx : connection_stats_timer_mtx; + auto& timer = which == timer_type::check ? connector_check_timer : connection_stats_timer; + const auto& func = which == timer_type::check ? &connections_manager::connection_monitor : &connections_manager::connection_statistics_monitor; + fc::lock_guard g( mtx ); + if (!timer) { + timer = std::make_unique( my_impl->thread_pool.get_executor() ); + } + timer->expires_from_now( du ); + timer->async_wait( [this, from_connection{std::move(from_connection)}, f = func](boost::system::error_code ec) mutable { if( !ec ) { - connection_monitor(from_connection); + (this->*f)(from_connection); } }); } - void connections_manager::stop_conn_timer() { - fc::lock_guard g( connector_check_timer_mtx ); - if (connector_check_timer) { - connector_check_timer->cancel(); + void connections_manager::stop_conn_timers() { + { + fc::lock_guard g( connector_check_timer_mtx ); + if (connector_check_timer) { + connector_check_timer->cancel(); + } + } + { + fc::lock_guard g( connection_stats_timer_mtx ); + if (connection_stats_timer) { + connection_stats_timer->cancel(); + } } } @@ -4615,20 +4635,18 @@ namespace eosio { void connections_manager::connection_monitor(const std::weak_ptr& from_connection) { auto max_time = fc::time_point::now().safe_add(max_cleanup_time); auto from = from_connection.lock(); - std::unique_lock g( connections_mtx ); + const std::lock_guard g( connections_mtx ); auto& index = connections.get(); auto it = (from ? index.find(from) : index.begin()); if (it == index.end()) it = index.begin(); size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; - net_plugin::p2p_per_connection_metrics per_connection(index.size()); while (it != index.end()) { if (fc::time_point::now() >= max_time) { connection_wptr wit = (*it).c; - g.unlock(); fc_dlog( logger, "Exiting connection monitor early, ran out of time: ${t}", ("t", max_time - fc::time_point::now()) ); fc_ilog( logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}", ("num", num_clients)("max", max_client_count)("pnum", num_peers)("pmax", supplied_peers.size()) ); - start_conn_timer( std::chrono::milliseconds( 1 ), wit ); // avoid exhausting + start_conn_timer( std::chrono::milliseconds( 1 ), wit, timer_type::check ); // avoid exhausting return; } const connection_ptr& c = it->c; @@ -4639,6 +4657,59 @@ namespace eosio { } else { ++num_peers; } + + if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { + if (!c->incoming()) { + fc_dlog(logger, "attempting to connect in connection_monitor"); + if (!c->reconnect()) { + it = index.erase(it); + --num_peers; + ++num_rm; + continue; + } + } else { + --num_clients; + ++num_rm; + it = index.erase(it); + continue; + } + } + ++it; + } + + if( num_clients > 0 || num_peers > 0 ) { + fc_ilog(logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}, block producer peers: ${num_bp_peers}", + ("num", num_clients)("max", max_client_count)("pnum", num_peers)("pmax", supplied_peers.size())("num_bp_peers", num_bp_peers)); + } + fc_dlog( logger, "connection monitor, removed ${n} connections", ("n", num_rm) ); + start_conn_timer( connector_period, {}, timer_type::check ); + } + + // called from any thread + void connections_manager::connection_statistics_monitor(const std::weak_ptr& from_connection) { + auto max_time = fc::time_point::now().safe_add(max_cleanup_time); + auto from = from_connection.lock(); + const std::lock_guard g(connections_mtx); + auto& index = connections.get(); + auto it = (from ? index.find(from) : index.begin()); + if( it == index.end()) it = index.begin(); + size_t num_clients = 0, num_peers = 0, num_bp_peers = 0; + net_plugin::p2p_per_connection_metrics per_connection(index.size()); + while(it != index.end()) { + if(fc::time_point::now() >= max_time) { + connection_wptr wit = (*it).c; + fc_dlog(logger, "connection statistics monitor ran out of time"); + start_conn_timer(std::chrono::milliseconds(1), wit, timer_type::stats); + return; + } + const connection_ptr& c = it->c; + if(c->is_bp_connection) { + ++num_bp_peers; + } else if(c->incoming()) { + ++num_clients; + } else { + ++num_peers; + } if (update_p2p_connection_metrics) { fc::unique_lock g_conn(c->conn_mtx); boost::asio::ip::address_v6::bytes_type addr = c->remote_endpoint_ip_array; @@ -4663,41 +4734,11 @@ namespace eosio { }; per_connection.peers.push_back(metrics); } - - if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { - if (!c->incoming()) { - g.unlock(); - fc_dlog(logger, "attempting to connect in connection_monitor"); - if (!c->reconnect()) { - g.lock(); - it = index.erase(it); - g.unlock(); - --num_peers; - ++num_rm; - continue; - } - g.lock(); - } else { - --num_clients; - ++num_rm; - it = index.erase(it); - continue; - } - } - ++it; } - g.unlock(); - if (update_p2p_connection_metrics) { + if(update_p2p_connection_metrics) { update_p2p_connection_metrics({num_peers, num_clients, std::move(per_connection)}); } - - if( num_clients > 0 || num_peers > 0 ) { - fc_ilog(logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}, block producer peers: ${num_bp_peers}", - ("num", num_clients)("max", max_client_count)("pnum", num_peers)("pmax", supplied_peers.size())("num_bp_peers", num_bp_peers)); - } - fc_dlog( logger, "connection monitor, removed ${n} connections", ("n", num_rm) ); - start_conn_timer( connector_period, {}); + start_conn_timer( connector_period, {}, timer_type::stats ); } - } // namespace eosio From 669ed0facc240fe243df09903337a6dab9ddcc9f Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 29 Sep 2023 18:16:27 -0500 Subject: [PATCH 35/46] Revert mutex and lock type changes. --- plugins/net_plugin/net_plugin.cpp | 50 ++++++++++++++++++------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 113c0bc94d..8730ab3b77 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -367,7 +367,7 @@ namespace eosio { enum class timer_type { check, stats }; private: alignas(hardware_destructive_interference_size) - mutable std::recursive_mutex connections_mtx; + mutable std::shared_mutex connections_mtx; connection_details_index connections; chain::flat_set supplied_peers; @@ -1191,20 +1191,20 @@ namespace eosio { template bool connections_manager::any_of_supplied_peers( Function&& f ) const { - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); return std::any_of(supplied_peers.begin(), supplied_peers.end(), std::forward(f)); } template void connections_manager::for_each_connection( Function&& f ) const { - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); auto& index = connections.get(); std::for_each(index.begin(), index.end(), std::forward(f)); } template void connections_manager::for_each_block_connection( Function&& f ) const { - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); auto& index = connections.get(); for( const connection_ptr& c : index ) { if (c->is_blocks_connection()) { @@ -1215,14 +1215,14 @@ namespace eosio { template bool connections_manager::any_of_connections(UnaryPredicate&& p) const { - const std::lock_guard g(connections_mtx); + std::shared_lock g(connections_mtx); auto& index = connections.get(); return std::any_of(index.cbegin(), index.cend(), std::forward(p)); } template bool connections_manager::any_of_block_connections(UnaryPredicate&& p) const { - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); auto& index = connections.get(); for( const connection_ptr& c : index ) { if( c->is_blocks_connection() ) { @@ -4424,12 +4424,12 @@ namespace eosio { //---------------------------------------------------------------------------- size_t connections_manager::number_connections() const { - const std::lock_guard g(connections_mtx); + std::lock_guard g(connections_mtx); return connections.size(); } void connections_manager::add_supplied_peers(const vector& peers ) { - const std::lock_guard g(connections_mtx); + std::lock_guard g(connections_mtx); supplied_peers.insert( peers.begin(), peers.end() ); } @@ -4454,15 +4454,16 @@ namespace eosio { } void connections_manager::connect_supplied_peers(const string& p2p_address) { - const std::lock_guard g(connections_mtx); + std::unique_lock g(connections_mtx); chain::flat_set peers = supplied_peers; + g.unlock(); for (const auto& peer : peers) { resolve_and_connect(peer, p2p_address); } } void connections_manager::add( connection_ptr c ) { - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); boost::system::error_code ec; auto endpoint = c->socket->remote_endpoint(ec); connections.insert( connection_details{ @@ -4473,8 +4474,9 @@ namespace eosio { // called by API string connections_manager::connect( const string& host, const string& p2p_address ) { - const std::lock_guard g( connections_mtx ); + std::unique_lock g( connections_mtx ); supplied_peers.insert(host); + g.unlock(); return resolve_and_connect( host, p2p_address ); } @@ -4485,7 +4487,7 @@ namespace eosio { return "invalid peer address"; } - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); if( find_connection_i( peer_address ) ) return "already connected"; @@ -4497,7 +4499,7 @@ namespace eosio { [resolver, host = host, port = port, peer_address = peer_address, listen_address = listen_address, this]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { connection_ptr c = std::make_shared( peer_address, listen_address ); c->set_heartbeat_timeout( heartbeat_timeout ); - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); auto [it, inserted] = connections.emplace( connection_details{ .host = peer_address, .c = std::move(c), @@ -4518,7 +4520,7 @@ namespace eosio { void connections_manager::update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint) { - const std::lock_guard g( connections_mtx ); + std::unique_lock g( connections_mtx ); auto& index = connections.get(); const auto& it = index.find(c); if( it != index.end() ) { @@ -4529,7 +4531,7 @@ namespace eosio { } void connections_manager::connect(const connection_ptr& c) { - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); const auto& index = connections.get(); const auto& it = index.find(c); if( it != index.end() ) { @@ -4539,7 +4541,7 @@ namespace eosio { // called by API string connections_manager::disconnect( const string& host ) { - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); auto& index = connections.get(); if( auto i = index.find( host ); i != index.end() ) { fc_ilog( logger, "disconnecting: ${cid}", ("cid", i->c->connection_id) ); @@ -4552,7 +4554,7 @@ namespace eosio { } void connections_manager::close_all() { - const std::lock_guard g( connections_mtx ); + std::lock_guard g( connections_mtx ); auto& index = connections.get(); fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); for( const connection_ptr& c : index ) { @@ -4563,7 +4565,7 @@ namespace eosio { } std::optional connections_manager::status( const string& host )const { - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); auto con = find_connection_i( host ); if( con ) { return con->get_status(); @@ -4573,7 +4575,7 @@ namespace eosio { vector connections_manager::connection_statuses()const { vector result; - const std::lock_guard g( connections_mtx ); + std::shared_lock g( connections_mtx ); auto& index = connections.get(); result.reserve( index.size() ); for( const connection_ptr& c : index ) { @@ -4635,7 +4637,7 @@ namespace eosio { void connections_manager::connection_monitor(const std::weak_ptr& from_connection) { auto max_time = fc::time_point::now().safe_add(max_cleanup_time); auto from = from_connection.lock(); - const std::lock_guard g( connections_mtx ); + std::unique_lock g( connections_mtx ); auto& index = connections.get(); auto it = (from ? index.find(from) : index.begin()); if (it == index.end()) it = index.begin(); @@ -4643,6 +4645,7 @@ namespace eosio { while (it != index.end()) { if (fc::time_point::now() >= max_time) { connection_wptr wit = (*it).c; + g.unlock(); fc_dlog( logger, "Exiting connection monitor early, ran out of time: ${t}", ("t", max_time - fc::time_point::now()) ); fc_ilog( logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}", ("num", num_clients)("max", max_client_count)("pnum", num_peers)("pmax", supplied_peers.size()) ); @@ -4660,13 +4663,17 @@ namespace eosio { if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { if (!c->incoming()) { + g.unlock(); fc_dlog(logger, "attempting to connect in connection_monitor"); if (!c->reconnect()) { + g.lock(); it = index.erase(it); + g.unlock(); --num_peers; ++num_rm; continue; } + g.lock(); } else { --num_clients; ++num_rm; @@ -4676,6 +4683,7 @@ namespace eosio { } ++it; } + g.unlock(); if( num_clients > 0 || num_peers > 0 ) { fc_ilog(logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}, block producer peers: ${num_bp_peers}", @@ -4741,4 +4749,4 @@ namespace eosio { } start_conn_timer( connector_period, {}, timer_type::stats ); } -} // namespace eosio +} // namespace eosio \ No newline at end of file From a6f7761433f16a9885071894b5512044eee789ef Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Sun, 1 Oct 2023 23:51:00 -0500 Subject: [PATCH 36/46] Revise connection_monitor for thread safety. --- plugins/net_plugin/net_plugin.cpp | 40 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 8730ab3b77..68ed0333c5 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4635,17 +4635,39 @@ namespace eosio { // called from any thread void connections_manager::connection_monitor(const std::weak_ptr& from_connection) { + size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; + auto cleanup = [&num_peers, &num_rm, this](vector&& reconnecting, + vector&& removing) { + for( auto c : reconnecting ) { + if (!c->reconnect()) { + --num_peers; + ++num_rm; + removing.push_back(c); + } + } + std::unique_lock g( connections_mtx ); + auto& index = connections.get(); + for( auto c : removing ) { + auto rit = index.find(c); + if (rit != index.end()) { + index.erase(rit); + } + } + g.unlock(); + + }; auto max_time = fc::time_point::now().safe_add(max_cleanup_time); + std::vector reconnecting, removing; auto from = from_connection.lock(); std::unique_lock g( connections_mtx ); auto& index = connections.get(); auto it = (from ? index.find(from) : index.begin()); if (it == index.end()) it = index.begin(); - size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; while (it != index.end()) { if (fc::time_point::now() >= max_time) { connection_wptr wit = (*it).c; g.unlock(); + cleanup(std::move(reconnecting), std::move(removing)); fc_dlog( logger, "Exiting connection monitor early, ran out of time: ${t}", ("t", max_time - fc::time_point::now()) ); fc_ilog( logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}", ("num", num_clients)("max", max_client_count)("pnum", num_peers)("pmax", supplied_peers.size()) ); @@ -4663,27 +4685,17 @@ namespace eosio { if (!c->socket_is_open() && c->state() != connection::connection_state::connecting) { if (!c->incoming()) { - g.unlock(); - fc_dlog(logger, "attempting to connect in connection_monitor"); - if (!c->reconnect()) { - g.lock(); - it = index.erase(it); - g.unlock(); - --num_peers; - ++num_rm; - continue; - } - g.lock(); + reconnecting.push_back(c); } else { --num_clients; ++num_rm; - it = index.erase(it); - continue; + removing.push_back(c); } } ++it; } g.unlock(); + cleanup(std::move(reconnecting), std::move(removing)); if( num_clients > 0 || num_peers > 0 ) { fc_ilog(logger, "p2p client connections: ${num}/${max}, peer connections: ${pnum}/${pmax}, block producer peers: ${num_bp_peers}", From 7acac0c604a31a29f929a35c08151d744f689e26 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 2 Oct 2023 12:52:40 -0500 Subject: [PATCH 37/46] Misc cleanups --- plugins/net_plugin/net_plugin.cpp | 131 ++++++++++++++---------------- 1 file changed, 61 insertions(+), 70 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 68ed0333c5..2e6e5edba6 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -343,24 +343,23 @@ namespace eosio { class connections_manager { public: - struct connection_details { + struct connection_detail { std::string host; connection_ptr c; tcp::endpoint active_ip; tcp::resolver::results_type ips; - operator const connection_ptr&() const { return c; } }; using connection_details_index = multi_index_container< - connection_details, + connection_detail, indexed_by< ordered_non_unique< tag, - key<&connection_details::host> + key<&connection_detail::host> >, ordered_unique< tag, - key<&connection_details::c> + key<&connection_detail::c> > > >; @@ -1199,16 +1198,18 @@ namespace eosio { void connections_manager::for_each_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); auto& index = connections.get(); - std::for_each(index.begin(), index.end(), std::forward(f)); + for( const connection_detail& cd : index ) { + f(cd.c); + } } template void connections_manager::for_each_block_connection( Function&& f ) const { std::shared_lock g( connections_mtx ); auto& index = connections.get(); - for( const connection_ptr& c : index ) { - if (c->is_blocks_connection()) { - f(c); + for( const connection_detail& cd : index ) { + if (cd.c->is_blocks_connection()) { + f(cd.c); } } } @@ -1217,16 +1218,20 @@ namespace eosio { bool connections_manager::any_of_connections(UnaryPredicate&& p) const { std::shared_lock g(connections_mtx); auto& index = connections.get(); - return std::any_of(index.cbegin(), index.cend(), std::forward(p)); + for( const connection_detail& cd : index ) { + if (p(cd.c)) + return true; + } + return false; } template bool connections_manager::any_of_block_connections(UnaryPredicate&& p) const { std::shared_lock g( connections_mtx ); auto& index = connections.get(); - for( const connection_ptr& c : index ) { - if( c->is_blocks_connection() ) { - if (p(c)) + for( const connection_detail& cd : index ) { + if( cd.c->is_blocks_connection() ) { + if (p(cd.c)) return true; } } @@ -4466,7 +4471,7 @@ namespace eosio { std::lock_guard g( connections_mtx ); boost::system::error_code ec; auto endpoint = c->socket->remote_endpoint(ec); - connections.insert( connection_details{ + connections.insert( connection_detail{ .host = c->peer_address(), .c = std::move(c), .active_ip = endpoint} ); @@ -4500,7 +4505,7 @@ namespace eosio { connection_ptr c = std::make_shared( peer_address, listen_address ); c->set_heartbeat_timeout( heartbeat_timeout ); std::lock_guard g( connections_mtx ); - auto [it, inserted] = connections.emplace( connection_details{ + auto [it, inserted] = connections.emplace( connection_detail{ .host = peer_address, .c = std::move(c), .ips = results @@ -4524,8 +4529,8 @@ namespace eosio { auto& index = connections.get(); const auto& it = index.find(c); if( it != index.end() ) { - index.modify(it, [endpoint](connection_details& d) { - d.active_ip = endpoint; + index.modify(it, [endpoint](connection_detail& cd) { + cd.active_ip = endpoint; }); } } @@ -4557,9 +4562,9 @@ namespace eosio { std::lock_guard g( connections_mtx ); auto& index = connections.get(); fc_ilog( logger, "close all ${s} connections", ("s", index.size()) ); - for( const connection_ptr& c : index ) { - fc_dlog( logger, "close: ${cid}", ("cid", c->connection_id) ); - c->close( false, true ); + for( const connection_detail& cd : index ) { + fc_dlog( logger, "close: ${cid}", ("cid", cd.c->connection_id) ); + cd.c->close( false, true ); } connections.clear(); } @@ -4578,8 +4583,8 @@ namespace eosio { std::shared_lock g( connections_mtx ); auto& index = connections.get(); result.reserve( index.size() ); - for( const connection_ptr& c : index ) { - result.emplace_back( c->get_status() ); + for( const connection_detail& cd : index ) { + result.emplace_back( cd.c->get_status() ); } return result; } @@ -4597,6 +4602,9 @@ namespace eosio { void connections_manager::start_conn_timers() { start_conn_timer(connector_period, {}, timer_type::check); // this locks mutex start_conn_timer(connector_period, {}, timer_type::stats); // this locks mutex + if (update_p2p_connection_metrics) { + start_conn_timer(connector_period + connector_period / 2, {}, timer_type::stats); // this locks mutex + } } // called from any thread @@ -4638,23 +4646,18 @@ namespace eosio { size_t num_rm = 0, num_clients = 0, num_peers = 0, num_bp_peers = 0; auto cleanup = [&num_peers, &num_rm, this](vector&& reconnecting, vector&& removing) { - for( auto c : reconnecting ) { + for( auto& c : reconnecting ) { if (!c->reconnect()) { --num_peers; ++num_rm; removing.push_back(c); } } - std::unique_lock g( connections_mtx ); + std::scoped_lock g( connections_mtx ); auto& index = connections.get(); - for( auto c : removing ) { - auto rit = index.find(c); - if (rit != index.end()) { - index.erase(rit); - } + for( auto& c : removing ) { + index.erase(c); } - g.unlock(); - }; auto max_time = fc::time_point::now().safe_add(max_cleanup_time); std::vector reconnecting, removing; @@ -4707,21 +4710,13 @@ namespace eosio { // called from any thread void connections_manager::connection_statistics_monitor(const std::weak_ptr& from_connection) { - auto max_time = fc::time_point::now().safe_add(max_cleanup_time); + assert(update_p2p_connection_metrics); auto from = from_connection.lock(); - const std::lock_guard g(connections_mtx); + std::shared_lock g(connections_mtx); auto& index = connections.get(); - auto it = (from ? index.find(from) : index.begin()); - if( it == index.end()) it = index.begin(); size_t num_clients = 0, num_peers = 0, num_bp_peers = 0; net_plugin::p2p_per_connection_metrics per_connection(index.size()); - while(it != index.end()) { - if(fc::time_point::now() >= max_time) { - connection_wptr wit = (*it).c; - fc_dlog(logger, "connection statistics monitor ran out of time"); - start_conn_timer(std::chrono::milliseconds(1), wit, timer_type::stats); - return; - } + for (auto it = index.begin(); it != index.end(); ++it) { const connection_ptr& c = it->c; if(c->is_bp_connection) { ++num_bp_peers; @@ -4730,35 +4725,31 @@ namespace eosio { } else { ++num_peers; } - if (update_p2p_connection_metrics) { - fc::unique_lock g_conn(c->conn_mtx); - boost::asio::ip::address_v6::bytes_type addr = c->remote_endpoint_ip_array; - g_conn.unlock(); - net_plugin::p2p_per_connection_metrics::connection_metric metrics{ - .connection_id = c->connection_id - , .address = addr - , .port = c->get_remote_endpoint_port() - , .accepting_blocks = c->is_blocks_connection() - , .last_received_block = c->get_last_received_block_num() - , .first_available_block = c->get_peer_start_block_num() - , .last_available_block = c->get_peer_head_block_num() - , .unique_first_block_count = c->get_unique_blocks_rcvd_count() - , .latency = c->get_peer_ping_time_ns() - , .bytes_received = c->get_bytes_received() - , .last_bytes_received = c->get_last_bytes_received() - , .bytes_sent = c->get_bytes_sent() - , .last_bytes_sent = c->get_last_bytes_sent() - , .block_sync_bytes_sent = c->get_block_sync_bytes_sent() - , .connection_start_time = c->connection_start_time - , .log_p2p_address = c->log_p2p_address - }; - per_connection.peers.push_back(metrics); - } - } - - if(update_p2p_connection_metrics) { - update_p2p_connection_metrics({num_peers, num_clients, std::move(per_connection)}); + fc::unique_lock g_conn(c->conn_mtx); + boost::asio::ip::address_v6::bytes_type addr = c->remote_endpoint_ip_array; + g_conn.unlock(); + net_plugin::p2p_per_connection_metrics::connection_metric metrics{ + .connection_id = c->connection_id + , .address = addr + , .port = c->get_remote_endpoint_port() + , .accepting_blocks = c->is_blocks_connection() + , .last_received_block = c->get_last_received_block_num() + , .first_available_block = c->get_peer_start_block_num() + , .last_available_block = c->get_peer_head_block_num() + , .unique_first_block_count = c->get_unique_blocks_rcvd_count() + , .latency = c->get_peer_ping_time_ns() + , .bytes_received = c->get_bytes_received() + , .last_bytes_received = c->get_last_bytes_received() + , .bytes_sent = c->get_bytes_sent() + , .last_bytes_sent = c->get_last_bytes_sent() + , .block_sync_bytes_sent = c->get_block_sync_bytes_sent() + , .connection_start_time = c->connection_start_time + , .log_p2p_address = c->log_p2p_address + }; + per_connection.peers.push_back(metrics); } + g.unlock(); + update_p2p_connection_metrics({num_peers+num_bp_peers, num_clients, std::move(per_connection)}); start_conn_timer( connector_period, {}, timer_type::stats ); } } // namespace eosio \ No newline at end of file From ff7a8a1c1e0e04e092f5980a9bd4e9f3932c8ce0 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Wed, 4 Oct 2023 00:05:13 -0500 Subject: [PATCH 38/46] Add block sync bytes received metric and use it in sync throttle test. --- .../include/eosio/net_plugin/net_plugin.hpp | 1 + plugins/net_plugin/net_plugin.cpp | 12 ++- plugins/prometheus_plugin/metrics.hpp | 1 + tests/p2p_sync_throttle_test.py | 99 +++++++++++++++++-- 4 files changed, 100 insertions(+), 13 deletions(-) diff --git a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp index 1db805ac4f..6a797bd18a 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp @@ -54,6 +54,7 @@ namespace eosio { std::chrono::nanoseconds last_bytes_received{0}; size_t bytes_sent{0}; std::chrono::nanoseconds last_bytes_sent{0}; + size_t block_sync_bytes_received{0}; size_t block_sync_bytes_sent{0}; std::chrono::nanoseconds connection_start_time{0}; std::string log_p2p_address; diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 2e6e5edba6..a4137d453d 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -851,6 +851,7 @@ namespace eosio { std::chrono::nanoseconds get_last_bytes_received() const { return last_bytes_received.load(); } size_t get_bytes_sent() const { return bytes_sent.load(); } std::chrono::nanoseconds get_last_bytes_sent() const { return last_bytes_sent.load(); } + size_t get_block_sync_bytes_received() const { return block_sync_bytes_received.load(); } size_t get_block_sync_bytes_sent() const { return block_sync_bytes_sent.load(); } boost::asio::ip::port_type get_remote_endpoint_port() const { return remote_endpoint_port.load(); } void set_heartbeat_timeout(std::chrono::milliseconds msec) { @@ -888,6 +889,7 @@ namespace eosio { std::atomic bytes_received{0}; std::atomic last_bytes_received{0ns}; std::atomic bytes_sent{0}; + std::atomic block_sync_bytes_received{0}; std::atomic block_sync_bytes_sent{0}; std::atomic last_bytes_sent{0ns}; std::atomic remote_endpoint_port{0}; @@ -1739,6 +1741,7 @@ namespace eosio { auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); if( current_rate >= block_sync_rate_limit ) { + peer_dlog( this, "throttling block sync to peer ${host}:${port}", ("host", log_remote_endpoint_ip)("port", log_remote_endpoint_port)); return false; } } @@ -3019,7 +3022,6 @@ namespace eosio { fc::raw::unpack( peek_ds, which ); // throw away block_header bh; fc::raw::unpack( peek_ds, bh ); - const block_id_type blk_id = bh.calculate_id(); const uint32_t blk_num = last_received_block_num = block_header::num_from_id(blk_id); // don't add_peer_block because we have not validated this block header yet @@ -3053,6 +3055,7 @@ namespace eosio { return true; } } else { + block_sync_bytes_received += message_length; my_impl->sync_master->sync_recv_block(shared_from_this(), blk_id, blk_num, false); } @@ -4728,7 +4731,8 @@ namespace eosio { fc::unique_lock g_conn(c->conn_mtx); boost::asio::ip::address_v6::bytes_type addr = c->remote_endpoint_ip_array; g_conn.unlock(); - net_plugin::p2p_per_connection_metrics::connection_metric metrics{ + per_connection.peers.emplace_back( + net_plugin::p2p_per_connection_metrics::connection_metric{ .connection_id = c->connection_id , .address = addr , .port = c->get_remote_endpoint_port() @@ -4742,11 +4746,11 @@ namespace eosio { , .last_bytes_received = c->get_last_bytes_received() , .bytes_sent = c->get_bytes_sent() , .last_bytes_sent = c->get_last_bytes_sent() + , .block_sync_bytes_received = c->get_block_sync_bytes_received() , .block_sync_bytes_sent = c->get_block_sync_bytes_sent() , .connection_start_time = c->connection_start_time , .log_p2p_address = c->log_p2p_address - }; - per_connection.peers.push_back(metrics); + }); } g.unlock(); update_p2p_connection_metrics({num_peers+num_bp_peers, num_clients, std::move(per_connection)}); diff --git a/plugins/prometheus_plugin/metrics.hpp b/plugins/prometheus_plugin/metrics.hpp index f67620317e..5562896284 100644 --- a/plugins/prometheus_plugin/metrics.hpp +++ b/plugins/prometheus_plugin/metrics.hpp @@ -187,6 +187,7 @@ struct catalog_type { add_and_set_gauge("last_bytes_received", peer.last_bytes_received.count()); add_and_set_gauge("bytes_sent", peer.bytes_sent); add_and_set_gauge("last_bytes_sent", peer.last_bytes_sent.count()); + add_and_set_gauge("block_sync_bytes_received", peer.block_sync_bytes_received); add_and_set_gauge("block_sync_bytes_sent", peer.block_sync_bytes_sent); add_and_set_gauge("connection_start_time", peer.connection_start_time.count()); add_and_set_gauge(peer.log_p2p_address, 0); // Empty gauge; we only want the label diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 0de560d44e..647ce2d3da 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 +import math +import re +import requests import signal import time @@ -13,6 +16,7 @@ # ############################################################### +PROMETHEUS_URL = '/v1/prometheus/metrics' Print=Utils.Print errorExit=Utils.errorExit @@ -38,6 +42,19 @@ cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs) walletMgr=WalletMgr(True) +def readMetrics(host: str, port: str): + response = requests.get(f'http://{host}:{port}{PROMETHEUS_URL}', timeout=10) + if response.status_code != 200: + errorExit(f'Prometheus metrics URL returned {response.status_code}: {response.url}') + return response + +def extractPrometheusMetric(connID: str, metric: str, text: str): + searchStr = f'nodeos_p2p_connections{{connid_{connID}="{metric}"}} ' + begin = text.find(searchStr) + len(searchStr) + return int(text[begin:response.text.find('\n', begin)]) + +prometheusHostPortPattern = re.compile(r'^nodeos_p2p_connections.connid_([0-9])="localhost:([0-9]*)', re.MULTILINE) + try: TestHelper.printSystemInfo("BEGIN") @@ -46,10 +63,7 @@ Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}') Print("Stand up cluster") - if args.plugin: - extraNodeosArgs = ''.join([i+j for i,j in zip([' --plugin '] * len(args.plugin), args.plugin)]) - else: - extraNodeosArgs = '' + extraNodeosArgs = '--plugin eosio::prometheus_plugin --connection-cleanup-period 3' # Custom topology is a line of singlely connected nodes from highest node number in sequence to lowest, # the reverse of the usual TestHarness line topology. if cluster.launch(pnodes=pnodes, unstartedNodes=2, totalNodes=total_nodes, prodCount=prod_count, @@ -112,19 +126,86 @@ cluster.launchUnstarted(2) throttledNode = cluster.getNode(3) - time.sleep(15) + while True: + try: + response = readMetrics(throttlingNode.host, throttlingNode.port) + except (requests.ConnectionError, requests.ReadTimeout) as e: + # waiting for node to finish startup and respond + time.sleep(0.5) + else: + connPorts = prometheusHostPortPattern.findall(response.text) + if len(connPorts) < 3: + # wait for node to be connected + time.sleep(0.5) + continue + Print('Throttling Node Start State') + #Print(response.text) + throttlingNodePortMap = {port: id for id, port in connPorts} + startSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], + 'block_sync_bytes_sent', + response.text) + Print(f'Start sync throttling bytes sent: {startSyncThrottlingBytesSent}') + break + while True: + try: + response = readMetrics(throttledNode.host, throttledNode.port) + except (requests.ConnectionError, requests.ReadTimeout) as e: + # waiting for node to finish startup and respond + time.sleep(0.5) + else: + if 'nodeos_p2p_connections{connid_2' not in response.text: + # wait for sending node to be connected + continue + Print('Throttled Node Start State') + #Print(response.text) + connPorts = prometheusHostPortPattern.findall(response.text) + throttledNodePortMap = {port: id for id, port in connPorts} + startSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], + 'block_sync_bytes_received', + response.text) + Print(f'Start sync throttled bytes received: {startSyncThrottledBytesReceived}') + break + # Throttling node was offline during block generation and once online receives blocks as fast as possible while # transmitting blocks to the next node in line at the above throttle setting. assert throttlingNode.waitForBlock(endLargeBlocksHeadBlock), f'wait for block {endLargeBlocksHeadBlock} on throttled node timed out' endThrottlingSync = time.time() + try: + response = readMetrics(throttlingNode.host, throttlingNode.port) + except (requests.ConnectionError, requests.ReadTimeout) as e: + errorExit(str(e)) + else: + Print('Throttling Node End State') + #Print(response.text) + endSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], + 'block_sync_bytes_sent', + response.text) + Print(f'End sync throttling bytes sent: {endSyncThrottlingBytesSent}') # Throttled node is connecting to a listen port with a block sync throttle applied so it will receive # blocks more slowly during syncing than an unthrottled node. assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=90), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' endThrottledSync = time.time() - Print(f'Unthrottled sync time: {endThrottlingSync - clusterStart} seconds') - Print(f'Throttled sync time: {endThrottledSync - clusterStart} seconds') - # 15 seconds chosen as the minimum reasonable sync time differential given the throttle and the average block size. - assert endThrottledSync - clusterStart > endThrottlingSync - clusterStart + 15, 'Throttled sync time must be at least 15 seconds greater than unthrottled' + try: + response = readMetrics(throttledNode.host, throttledNode.port) + except (requests.ConnectionError, requests.ReadTimeout) as e: + errorExit(str(e)) + else: + Print('Throttled Node End State') + #Print(response.text) + endSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], + 'block_sync_bytes_received', + response.text) + Print(f'End sync throttled bytes received: {endSyncThrottledBytesReceived}') + throttlingElapsed = endThrottlingSync - clusterStart + throttledElapsed = endThrottledSync - clusterStart + Print(f'Unthrottled sync time: {throttlingElapsed} seconds') + Print(f'Throttled sync time: {throttledElapsed} seconds') + # Sanity check + assert throttledElapsed > throttlingElapsed + 15, 'Throttled sync time must be at least 15 seconds greater than unthrottled' + # Calculate block receive rate + calculatedRate = (endSyncThrottledBytesReceived - startSyncThrottledBytesReceived)/throttledElapsed + #assert math.isclose(calculatedRate, 40000, rel_tol=0.01), f'Throttled bytes receive rate must be near 40,000, was {calculatedRate}' + assert calculatedRate < 40000, f'Throttled bytes receive rate must be less than 40,000, was {calculatedRate}' testSuccessful=True finally: From 6b2fe6396984023e761fb88673474ffa24cd8201 Mon Sep 17 00:00:00 2001 From: Peter Oschwald Date: Wed, 4 Oct 2023 08:03:20 -0500 Subject: [PATCH 39/46] Add requests module for test. --- .cicd/platforms/ubuntu20.Dockerfile | 1 + .cicd/platforms/ubuntu22.Dockerfile | 1 + 2 files changed, 2 insertions(+) diff --git a/.cicd/platforms/ubuntu20.Dockerfile b/.cicd/platforms/ubuntu20.Dockerfile index fe7aaea80e..89783cb40e 100644 --- a/.cicd/platforms/ubuntu20.Dockerfile +++ b/.cicd/platforms/ubuntu20.Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update && apt-get upgrade -y && \ llvm-11-dev \ ninja-build \ python3-numpy \ + python3-requests \ file \ zlib1g-dev \ zstd && \ diff --git a/.cicd/platforms/ubuntu22.Dockerfile b/.cicd/platforms/ubuntu22.Dockerfile index 275d52a4c7..8fcca67050 100644 --- a/.cicd/platforms/ubuntu22.Dockerfile +++ b/.cicd/platforms/ubuntu22.Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update && apt-get upgrade -y && \ llvm-11-dev \ ninja-build \ python3-numpy \ + python3-requests \ file \ zlib1g-dev \ zstd From 1e5b4275d19fc760f9e7ff009f3cbc5a4d6c6c5c Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 6 Oct 2023 14:59:21 -0500 Subject: [PATCH 40/46] Add throttling flag to Prometheus peer data and use it in sync test. Remove dependency on python requests package. Remove locale-aware parsing of sync throttle rate. Prevent transmitting peer from throttling while not in sync mode. Add timeouts to throttle sync test. --- .cicd/platforms/ubuntu20.Dockerfile | 1 - .cicd/platforms/ubuntu22.Dockerfile | 1 - .../include/eosio/net_plugin/net_plugin.hpp | 1 + plugins/net_plugin/net_plugin.cpp | 14 +- plugins/prometheus_plugin/metrics.hpp | 1 + tests/p2p_sync_throttle_test.py | 126 +++++++++--------- 6 files changed, 76 insertions(+), 68 deletions(-) diff --git a/.cicd/platforms/ubuntu20.Dockerfile b/.cicd/platforms/ubuntu20.Dockerfile index 89783cb40e..fe7aaea80e 100644 --- a/.cicd/platforms/ubuntu20.Dockerfile +++ b/.cicd/platforms/ubuntu20.Dockerfile @@ -12,7 +12,6 @@ RUN apt-get update && apt-get upgrade -y && \ llvm-11-dev \ ninja-build \ python3-numpy \ - python3-requests \ file \ zlib1g-dev \ zstd && \ diff --git a/.cicd/platforms/ubuntu22.Dockerfile b/.cicd/platforms/ubuntu22.Dockerfile index 8fcca67050..275d52a4c7 100644 --- a/.cicd/platforms/ubuntu22.Dockerfile +++ b/.cicd/platforms/ubuntu22.Dockerfile @@ -11,7 +11,6 @@ RUN apt-get update && apt-get upgrade -y && \ llvm-11-dev \ ninja-build \ python3-numpy \ - python3-requests \ file \ zlib1g-dev \ zstd diff --git a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp index 6a797bd18a..1548006803 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/net_plugin.hpp @@ -56,6 +56,7 @@ namespace eosio { std::chrono::nanoseconds last_bytes_sent{0}; size_t block_sync_bytes_received{0}; size_t block_sync_bytes_sent{0}; + bool block_sync_throttling{false}; std::chrono::nanoseconds connection_start_time{0}; std::string log_p2p_address; }; diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 8bec0ad043..565729e203 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -853,6 +853,7 @@ namespace eosio { std::chrono::nanoseconds get_last_bytes_sent() const { return last_bytes_sent.load(); } size_t get_block_sync_bytes_received() const { return block_sync_bytes_received.load(); } size_t get_block_sync_bytes_sent() const { return block_sync_bytes_sent.load(); } + bool get_block_sync_throttling() const { return block_sync_throttling.load(); } boost::asio::ip::port_type get_remote_endpoint_port() const { return remote_endpoint_port.load(); } void set_heartbeat_timeout(std::chrono::milliseconds msec) { hb_timeout = msec; @@ -891,6 +892,7 @@ namespace eosio { std::atomic bytes_sent{0}; std::atomic block_sync_bytes_received{0}; std::atomic block_sync_bytes_sent{0}; + std::atomic block_sync_throttling{false}; std::atomic last_bytes_sent{0ns}; std::atomic remote_endpoint_port{0}; @@ -1737,14 +1739,16 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { - if( block_sync_rate_limit > 0 ) { + if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); if( current_rate >= block_sync_rate_limit ) { + block_sync_throttling = true; peer_dlog( this, "throttling block sync to peer ${host}:${port}", ("host", log_remote_endpoint_ip)("port", log_remote_endpoint_port)); return false; } } + block_sync_throttling = false; block_sync_bytes_sent += enqueue_block( sb, true ); ++peer_requested->last; } else { @@ -4007,13 +4011,12 @@ namespace eosio { ( "p2p-listen-endpoint", bpo::value< vector >()->default_value( vector(1, string("0.0.0.0:9876:0")) ), "The actual host:port[:] used to listen for incoming p2p connections. May be used multiple times. " " The optional rate cap will limit per connection block sync bandwidth to the specified rate. Total " " allowed bandwidth is the rate-cap multiplied by the connection count limit. A number alone will be " - " interpreted as bytes per second. The number is parsed locale-aware and may include thousands and " - " decimal separators. It may also be suffixed with units. Supported units are: " + " interpreted as bytes per second. The number may be suffixed with units. Supported units are: " " 'B/s', 'KB/s', 'MB/s, 'GB/s', 'TB/s', 'KiB/s', 'MiB/s', 'GiB/s', 'TiB/s'." " Transactions and blocks outside of sync mode are not throttled." " Examples:\n" " 192.168.0.100:9876:1MiB/s\n" - " node.eos.io:9876:1,512KB/s\n" + " node.eos.io:9876:1512KB/s\n" " node.eos.io:9876:0.5GB/s\n" " [2001:db8:85a3:8d3:1319:8a2e:370:7348]:9876:250KB/s") ( "p2p-server-address", bpo::value< vector >(), "An externally accessible host:port for identifying this node. Defaults to p2p-listen-endpoint. May be used as many times as p2p-listen-endpoint. If provided, the first address will be used in handshakes with other nodes. Otherwise the default is used.") @@ -4094,8 +4097,6 @@ namespace eosio { size_t net_plugin_impl::parse_connection_rate_limit( const std::string& limit_str) const { std::istringstream in(limit_str); - fc_dlog( logger, "parsing connection endpoint limit ${limit} with locale ${l}", ("limit", limit_str)("l", std::locale("").name())); - in.imbue(std::locale("")); double limit{0}; in >> limit; EOS_ASSERT(limit >= 0.0, plugin_config_exception, "block sync rate limit must not be negative: ${limit}", ("limit", limit_str)); @@ -4749,6 +4750,7 @@ namespace eosio { , .last_bytes_sent = c->get_last_bytes_sent() , .block_sync_bytes_received = c->get_block_sync_bytes_received() , .block_sync_bytes_sent = c->get_block_sync_bytes_sent() + , .block_sync_throttling = c->get_block_sync_throttling() , .connection_start_time = c->connection_start_time , .log_p2p_address = c->log_p2p_address }); diff --git a/plugins/prometheus_plugin/metrics.hpp b/plugins/prometheus_plugin/metrics.hpp index 5562896284..9c0fb3ac88 100644 --- a/plugins/prometheus_plugin/metrics.hpp +++ b/plugins/prometheus_plugin/metrics.hpp @@ -189,6 +189,7 @@ struct catalog_type { add_and_set_gauge("last_bytes_sent", peer.last_bytes_sent.count()); add_and_set_gauge("block_sync_bytes_received", peer.block_sync_bytes_received); add_and_set_gauge("block_sync_bytes_sent", peer.block_sync_bytes_sent); + add_and_set_gauge("block_sync_throttling", peer.block_sync_throttling); add_and_set_gauge("connection_start_time", peer.connection_start_time.count()); add_and_set_gauge(peer.log_p2p_address, 0); // Empty gauge; we only want the label } diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index 647ce2d3da..b8cec9bda9 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -2,11 +2,12 @@ import math import re -import requests import signal +import sys import time +import urllib -from TestHarness import Cluster, TestHelper, Utils, WalletMgr, CORE_SYMBOL, createAccountKeys +from TestHarness import Cluster, TestHelper, Utils, WalletMgr, CORE_SYMBOL, createAccountKeys, ReturnType from TestHarness.TestHelper import AppArgs ############################################################### @@ -16,8 +17,6 @@ # ############################################################### -PROMETHEUS_URL = '/v1/prometheus/metrics' - Print=Utils.Print errorExit=Utils.errorExit @@ -42,16 +41,10 @@ cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs) walletMgr=WalletMgr(True) -def readMetrics(host: str, port: str): - response = requests.get(f'http://{host}:{port}{PROMETHEUS_URL}', timeout=10) - if response.status_code != 200: - errorExit(f'Prometheus metrics URL returned {response.status_code}: {response.url}') - return response - def extractPrometheusMetric(connID: str, metric: str, text: str): searchStr = f'nodeos_p2p_connections{{connid_{connID}="{metric}"}} ' begin = text.find(searchStr) + len(searchStr) - return int(text[begin:response.text.find('\n', begin)]) + return int(text[begin:text.find('\n', begin)]) prometheusHostPortPattern = re.compile(r'^nodeos_p2p_connections.connid_([0-9])="localhost:([0-9]*)', re.MULTILINE) @@ -101,7 +94,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): beginLargeBlocksHeadBlock = nonProdNode.getHeadBlockNum() Print("Configure and launch txn generators") - targetTpsPerGenerator = 100 + targetTpsPerGenerator = 200 testTrxGenDurationSec=60 trxGeneratorCnt=1 cluster.launchTrxGenerators(contractOwnerAcctName=cluster.eosioAccount.name, acctNamesList=[accounts[0].name,accounts[1].name], @@ -113,10 +106,10 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): throttlingNode = cluster.unstartedNodes[0] i = throttlingNode.cmd.index('--p2p-listen-endpoint') throttleListenAddr = throttlingNode.cmd[i+1] - # Using 40000 bytes per second to allow syncing of 10,000 byte blocks resulting from - # the trx generators in a reasonable amount of time, while still being reliably - # distinguishable from unthrottled throughput. - throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':40000B/s' + # Using 5000 bytes per second to allow syncing of ~100 transaction blocks resulting from + # the trx generators in a reasonable amount of time, while still being able to capture + # throttling state within the Prometheus update window (3 seconds in this test). + throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':5000B/s' throttleListenIP, throttleListenPort = throttleListenAddr.split(':') throttlingNode.cmd.append('--p2p-listen-endpoint') throttlingNode.cmd.append(f'{throttleListenIP}:{int(throttleListenPort)+100}:1TB/s') @@ -126,86 +119,99 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): cluster.launchUnstarted(2) throttledNode = cluster.getNode(3) - while True: + while time.time() < clusterStart + 30: try: - response = readMetrics(throttlingNode.host, throttlingNode.port) - except (requests.ConnectionError, requests.ReadTimeout) as e: - # waiting for node to finish startup and respond + response = throttlingNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() + except urllib.error.URLError: + # catch ConnectionRefusedEror waiting for node to finish startup and respond time.sleep(0.5) + continue else: - connPorts = prometheusHostPortPattern.findall(response.text) + if len(response) < 100: + # tolerate HTTPError as well (method returns only the exception code) + continue + connPorts = prometheusHostPortPattern.findall(response) if len(connPorts) < 3: # wait for node to be connected time.sleep(0.5) continue Print('Throttling Node Start State') - #Print(response.text) throttlingNodePortMap = {port: id for id, port in connPorts} startSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], - 'block_sync_bytes_sent', - response.text) + 'block_sync_bytes_sent', + response) + startSyncThrottlingState = extractPrometheusMetric(throttlingNodePortMap['9879'], + 'block_sync_throttling', + response) Print(f'Start sync throttling bytes sent: {startSyncThrottlingBytesSent}') + Print(f'Start sync throttling node throttling: {"True" if startSyncThrottlingState else "False"}') break - while True: + else: + errorExit('Timed out') + + while time.time() < clusterStart + 30: try: - response = readMetrics(throttledNode.host, throttledNode.port) - except (requests.ConnectionError, requests.ReadTimeout) as e: - # waiting for node to finish startup and respond + response = throttledNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() + except urllib.error.URLError: + # catch ConnectionRefusedError waiting for node to finish startup and respond time.sleep(0.5) + continue else: - if 'nodeos_p2p_connections{connid_2' not in response.text: + if len(response) < 100: + # tolerate HTTPError as well (method returns only the exception code) + time.sleep(0.5) + continue + connPorts = prometheusHostPortPattern.findall(response) + if len(connPorts) < 2: # wait for sending node to be connected continue Print('Throttled Node Start State') - #Print(response.text) - connPorts = prometheusHostPortPattern.findall(response.text) throttledNodePortMap = {port: id for id, port in connPorts} startSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], - 'block_sync_bytes_received', - response.text) + 'block_sync_bytes_received', + response) Print(f'Start sync throttled bytes received: {startSyncThrottledBytesReceived}') break + else: + errorExit('Timed out') # Throttling node was offline during block generation and once online receives blocks as fast as possible while # transmitting blocks to the next node in line at the above throttle setting. assert throttlingNode.waitForBlock(endLargeBlocksHeadBlock), f'wait for block {endLargeBlocksHeadBlock} on throttled node timed out' endThrottlingSync = time.time() - try: - response = readMetrics(throttlingNode.host, throttlingNode.port) - except (requests.ConnectionError, requests.ReadTimeout) as e: - errorExit(str(e)) - else: - Print('Throttling Node End State') - #Print(response.text) - endSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], - 'block_sync_bytes_sent', - response.text) - Print(f'End sync throttling bytes sent: {endSyncThrottlingBytesSent}') + response = throttlingNode.processUrllibRequest('prometheus', 'metrics', exitOnError=True, returnType=ReturnType.raw, printReturnLimit=16).decode() + Print('Throttling Node End State') + endSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], + 'block_sync_bytes_sent', + response) + Print(f'End sync throttling bytes sent: {endSyncThrottlingBytesSent}') # Throttled node is connecting to a listen port with a block sync throttle applied so it will receive # blocks more slowly during syncing than an unthrottled node. + wasThrottled = False + while time.time() < endThrottlingSync + 30: + response = throttlingNode.processUrllibRequest('prometheus', 'metrics', exitOnError=True, + returnType=ReturnType.raw, printReturnLimit=16).decode() + throttledState = extractPrometheusMetric(throttlingNodePortMap['9879'], + 'block_sync_throttling', + response) + if throttledState: + wasThrottled = True + break assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=90), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' endThrottledSync = time.time() - try: - response = readMetrics(throttledNode.host, throttledNode.port) - except (requests.ConnectionError, requests.ReadTimeout) as e: - errorExit(str(e)) - else: - Print('Throttled Node End State') - #Print(response.text) - endSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], - 'block_sync_bytes_received', - response.text) - Print(f'End sync throttled bytes received: {endSyncThrottledBytesReceived}') + response = throttledNode.processUrllibRequest('prometheus', 'metrics', exitOnError=True, returnType=ReturnType.raw, printReturnLimit=16).decode() + Print('Throttled Node End State') + endSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], + 'block_sync_bytes_received', + response) + Print(f'End sync throttled bytes received: {endSyncThrottledBytesReceived}') throttlingElapsed = endThrottlingSync - clusterStart throttledElapsed = endThrottledSync - clusterStart Print(f'Unthrottled sync time: {throttlingElapsed} seconds') Print(f'Throttled sync time: {throttledElapsed} seconds') # Sanity check - assert throttledElapsed > throttlingElapsed + 15, 'Throttled sync time must be at least 15 seconds greater than unthrottled' - # Calculate block receive rate - calculatedRate = (endSyncThrottledBytesReceived - startSyncThrottledBytesReceived)/throttledElapsed - #assert math.isclose(calculatedRate, 40000, rel_tol=0.01), f'Throttled bytes receive rate must be near 40,000, was {calculatedRate}' - assert calculatedRate < 40000, f'Throttled bytes receive rate must be less than 40,000, was {calculatedRate}' + assert throttledElapsed > throttlingElapsed + 10, 'Throttled sync time must be at least 10 seconds greater than unthrottled' + assert wasThrottled, 'Throttling node never reported throttling its transmission rate' testSuccessful=True finally: From db34bbf35fce488a53cf9740bb04c7dce0a4d2bc Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 6 Oct 2023 16:38:05 -0500 Subject: [PATCH 41/46] Revise for better repeatability. --- tests/p2p_sync_throttle_test.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index b8cec9bda9..da101eb9b8 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -94,7 +94,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): beginLargeBlocksHeadBlock = nonProdNode.getHeadBlockNum() Print("Configure and launch txn generators") - targetTpsPerGenerator = 200 + targetTpsPerGenerator = 500 testTrxGenDurationSec=60 trxGeneratorCnt=1 cluster.launchTrxGenerators(contractOwnerAcctName=cluster.eosioAccount.name, acctNamesList=[accounts[0].name,accounts[1].name], @@ -106,10 +106,10 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): throttlingNode = cluster.unstartedNodes[0] i = throttlingNode.cmd.index('--p2p-listen-endpoint') throttleListenAddr = throttlingNode.cmd[i+1] - # Using 5000 bytes per second to allow syncing of ~100 transaction blocks resulting from + # Using 4000 bytes per second to allow syncing of ~250 transaction blocks resulting from # the trx generators in a reasonable amount of time, while still being able to capture # throttling state within the Prometheus update window (3 seconds in this test). - throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':5000B/s' + throttlingNode.cmd[i+1] = throttlingNode.cmd[i+1] + ':4000B/s' throttleListenIP, throttleListenPort = throttleListenAddr.split(':') throttlingNode.cmd.append('--p2p-listen-endpoint') throttlingNode.cmd.append(f'{throttleListenIP}:{int(throttleListenPort)+100}:1TB/s') @@ -119,7 +119,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): cluster.launchUnstarted(2) throttledNode = cluster.getNode(3) - while time.time() < clusterStart + 30: + while True: try: response = throttlingNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() except urllib.error.URLError: @@ -145,11 +145,10 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): response) Print(f'Start sync throttling bytes sent: {startSyncThrottlingBytesSent}') Print(f'Start sync throttling node throttling: {"True" if startSyncThrottlingState else "False"}') + if time.time() > clusterStart + 30: errorExit('Timed out') break - else: - errorExit('Timed out') - while time.time() < clusterStart + 30: + while True: try: response = throttledNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() except urllib.error.URLError: @@ -168,12 +167,10 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): Print('Throttled Node Start State') throttledNodePortMap = {port: id for id, port in connPorts} startSyncThrottledBytesReceived = extractPrometheusMetric(throttledNodePortMap['9878'], - 'block_sync_bytes_received', - response) + 'block_sync_bytes_received', + response) Print(f'Start sync throttled bytes received: {startSyncThrottledBytesReceived}') break - else: - errorExit('Timed out') # Throttling node was offline during block generation and once online receives blocks as fast as possible while # transmitting blocks to the next node in line at the above throttle setting. @@ -182,8 +179,8 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): response = throttlingNode.processUrllibRequest('prometheus', 'metrics', exitOnError=True, returnType=ReturnType.raw, printReturnLimit=16).decode() Print('Throttling Node End State') endSyncThrottlingBytesSent = extractPrometheusMetric(throttlingNodePortMap['9879'], - 'block_sync_bytes_sent', - response) + 'block_sync_bytes_sent', + response) Print(f'End sync throttling bytes sent: {endSyncThrottlingBytesSent}') # Throttled node is connecting to a listen port with a block sync throttle applied so it will receive # blocks more slowly during syncing than an unthrottled node. @@ -197,7 +194,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): if throttledState: wasThrottled = True break - assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=90), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' + assert throttledNode.waitForBlock(endLargeBlocksHeadBlock, timeout=30), f'Wait for block {endLargeBlocksHeadBlock} on sync node timed out' endThrottledSync = time.time() response = throttledNode.processUrllibRequest('prometheus', 'metrics', exitOnError=True, returnType=ReturnType.raw, printReturnLimit=16).decode() Print('Throttled Node End State') @@ -209,8 +206,6 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): throttledElapsed = endThrottledSync - clusterStart Print(f'Unthrottled sync time: {throttlingElapsed} seconds') Print(f'Throttled sync time: {throttledElapsed} seconds') - # Sanity check - assert throttledElapsed > throttlingElapsed + 10, 'Throttled sync time must be at least 10 seconds greater than unthrottled' assert wasThrottled, 'Throttling node never reported throttling its transmission rate' testSuccessful=True From 6db4ad8aa535d0d4aef970ed4b74ef485be537d5 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Fri, 6 Oct 2023 18:33:30 -0500 Subject: [PATCH 42/46] Customize plugin_config_exception handling in net_plugin. --- plugins/net_plugin/net_plugin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 565729e203..c33d5d2e80 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4340,6 +4340,10 @@ namespace eosio { fc::create_listener( my->thread_pool.get_executor(), logger, accept_timeout, listen_addr, extra_listening_log_info, [my = my, addr = p2p_addr, block_sync_rate_limit = block_sync_rate_limit](tcp::socket&& socket) { fc_dlog( logger, "start listening on ${addr} with peer sync throttle ${limit}", ("addr", addr)("limit", block_sync_rate_limit)); my->create_session(std::move(socket), addr, block_sync_rate_limit); }); + } catch (const plugin_config_exception& e) { + fc_elog( logger, "${msg}", ("msg", e.top_message())); + app().quit(); + return; } catch (const std::exception& e) { fc_elog( logger, "net_plugin::plugin_startup failed to listen on ${addr}, ${what}", ("addr", address)("what", e.what()) ); From caa703d32ac2b90f7b48cdf3b4285c28a6bf93a8 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 10 Oct 2023 14:04:17 -0500 Subject: [PATCH 43/46] Add comment. --- plugins/net_plugin/net_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index c33d5d2e80..34147c0204 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1739,7 +1739,7 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { - if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { + if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { // only throttle peers in sync mode even if a limit is set auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); if( current_rate >= block_sync_rate_limit ) { From 8a5dfeb2b15267d04df073813c523649ceee220a Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 10 Oct 2023 14:35:23 -0500 Subject: [PATCH 44/46] Revert "Add comment." This reverts commit caa703d32ac2b90f7b48cdf3b4285c28a6bf93a8. --- plugins/net_plugin/net_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 34147c0204..c33d5d2e80 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1739,7 +1739,7 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { - if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { // only throttle peers in sync mode even if a limit is set + if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); if( current_rate >= block_sync_rate_limit ) { From e3d4870ac48394817e2195d3df958359e6b9afc8 Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 10 Oct 2023 14:40:09 -0500 Subject: [PATCH 45/46] Address peer review comments. --- plugins/net_plugin/net_plugin.cpp | 1 + tests/p2p_sync_throttle_test.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index c33d5d2e80..449edfd68a 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1739,6 +1739,7 @@ namespace eosio { sb = cc.fetch_block_by_number( num ); // thread-safe } FC_LOG_AND_DROP(); if( sb ) { + // Skip transmitting block this loop if threshold exceeded if( block_sync_rate_limit > 0 && peer_syncing_from_us ) { auto elapsed = std::chrono::duration_cast(get_time() - connection_start_time); auto current_rate = double(block_sync_bytes_sent) / elapsed.count(); diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index da101eb9b8..fd5ec8aa4c 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -118,17 +118,20 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): clusterStart = time.time() cluster.launchUnstarted(2) + errorLimit = 40 # Approximately 20 retries required throttledNode = cluster.getNode(3) - while True: + while errorLimit > 0: try: response = throttlingNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() except urllib.error.URLError: # catch ConnectionRefusedEror waiting for node to finish startup and respond + errorLimit -= 1 time.sleep(0.5) continue else: if len(response) < 100: # tolerate HTTPError as well (method returns only the exception code) + errorLimit -= 1 continue connPorts = prometheusHostPortPattern.findall(response) if len(connPorts) < 3: @@ -147,17 +150,22 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): Print(f'Start sync throttling node throttling: {"True" if startSyncThrottlingState else "False"}') if time.time() > clusterStart + 30: errorExit('Timed out') break + else: + errorExit('Exceeded error retry limit waiting for throttling node') - while True: + errorLimit = 40 # Few if any retries required but for consistency... + while errorLimit > 0: try: response = throttledNode.processUrllibRequest('prometheus', 'metrics', returnType=ReturnType.raw, printReturnLimit=16).decode() except urllib.error.URLError: # catch ConnectionRefusedError waiting for node to finish startup and respond + errorLimit -= 1 time.sleep(0.5) continue else: if len(response) < 100: # tolerate HTTPError as well (method returns only the exception code) + errorLimit -= 1 time.sleep(0.5) continue connPorts = prometheusHostPortPattern.findall(response) @@ -171,6 +179,8 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): response) Print(f'Start sync throttled bytes received: {startSyncThrottledBytesReceived}') break + else: + errorExit('Exceeded error retry limit waiting for throttled node') # Throttling node was offline during block generation and once online receives blocks as fast as possible while # transmitting blocks to the next node in line at the above throttle setting. From b067bca4e6779150330c291c9518fb555c0b674c Mon Sep 17 00:00:00 2001 From: Jonathan Giszczak Date: Tue, 10 Oct 2023 14:58:29 -0500 Subject: [PATCH 46/46] Address a couple more review comments. --- tests/p2p_sync_throttle_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/p2p_sync_throttle_test.py b/tests/p2p_sync_throttle_test.py index fd5ec8aa4c..4b15b8f49c 100755 --- a/tests/p2p_sync_throttle_test.py +++ b/tests/p2p_sync_throttle_test.py @@ -136,6 +136,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): connPorts = prometheusHostPortPattern.findall(response) if len(connPorts) < 3: # wait for node to be connected + errorLimit -= 1 time.sleep(0.5) continue Print('Throttling Node Start State') @@ -171,6 +172,7 @@ def extractPrometheusMetric(connID: str, metric: str, text: str): connPorts = prometheusHostPortPattern.findall(response) if len(connPorts) < 2: # wait for sending node to be connected + errorLimit -= 1 continue Print('Throttled Node Start State') throttledNodePortMap = {port: id for id, port in connPorts}