From cd1e57aa4bc1dc95f5f83674af58ce5deacdf5cc Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Mon, 11 Sep 2023 10:43:49 -0400 Subject: [PATCH] prevent unnecessary LIB catchups --- plugins/net_plugin/net_plugin.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index a8bb78dc00..3dc87aaeb5 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -241,6 +241,12 @@ namespace eosio { std::atomic sync_state{in_sync}; std::atomic sync_ordinal{0}; + // Instant finality makes it likely peers think their lib and head are + // not in sync but in reality they are only within small difference. + // To avoid unnecessary catchups, a margin of min_blocks_difference + // between lib and head must be reached before catchup starts. + const uint32_t min_blocks_difference{0}; + private: constexpr static auto stage_str( stages s ); bool set_state( stages newstate ); @@ -251,7 +257,7 @@ namespace eosio { bool verify_catchup( const connection_ptr& c, uint32_t num, const block_id_type& id ); // locks mutex public: - explicit sync_manager( uint32_t span, uint32_t sync_peer_limit ); + explicit sync_manager( uint32_t span, uint32_t sync_peer_limit, uint32_t min_blocks_difference ); static void send_handshakes(); bool syncing_from_peer() const { return sync_state == lib_catchup; } bool is_in_sync() const { return sync_state == in_sync; } @@ -1904,7 +1910,7 @@ namespace eosio { } //----------------------------------------------------------- - sync_manager::sync_manager( uint32_t span, uint32_t sync_peer_limit ) + sync_manager::sync_manager( uint32_t span, uint32_t sync_peer_limit, uint32_t min_blocks_difference ) :sync_known_lib_num( 0 ) ,sync_last_requested_num( 0 ) ,sync_next_expected_num( 1 ) @@ -1912,6 +1918,7 @@ namespace eosio { ,sync_req_span( span ) ,sync_peer_limit( sync_peer_limit ) ,sync_state(in_sync) + ,min_blocks_difference(min_blocks_difference) { } @@ -2167,7 +2174,7 @@ namespace eosio { c->peer_syncing_from_us = false; return; } - if (chain_info.head_num < msg.last_irreversible_block_num) { + if (chain_info.head_num + min_blocks_difference < msg.last_irreversible_block_num) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 1, head ${h}, lib ${l}", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ("h", chain_info.head_num)("l", chain_info.lib_num) ); @@ -2177,7 +2184,7 @@ namespace eosio { } return; } - if (chain_info.lib_num > msg.head_num + nblk_combined_latency) { + if (chain_info.lib_num > msg.head_num + nblk_combined_latency + min_blocks_difference) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 2, head ${h}, lib ${l}", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ("h", chain_info.head_num)("l", chain_info.lib_num) ); @@ -4027,9 +4034,6 @@ namespace eosio { peer_log_format = options.at( "peer-log-format" ).as(); - sync_master = std::make_unique( - options.at( "sync-fetch-span" ).as(), - options.at( "sync-peer-limit" ).as() ); txn_exp_period = def_txn_expire_wait; p2p_dedup_cache_expire_time_us = fc::seconds( options.at( "p2p-dedup-cache-expire-time-sec" ).as() ); @@ -4042,6 +4046,17 @@ namespace eosio { EOS_ASSERT( keepalive_interval.count() > 0, chain::plugin_config_exception, "p2p-keepalive_interval-ms must be greater than 0" ); + + // To avoid unnecessary LIB catchups, + // min_blocks_difference between LIB and head must be reached. + // Set it to the number of blocks produced during half of keep alive + // interval. + const uint32_t min_blocks_difference = (keepalive_interval.count() / config::block_interval_ms) / 2; + sync_master = std::make_unique( + options.at( "sync-fetch-span" ).as(), + options.at( "sync-peer-limit" ).as(), + min_blocks_difference); + connections.init( std::chrono::milliseconds( options.at("p2p-keepalive-interval-ms").as() * 2 ), fc::milliseconds( options.at("max-cleanup-time-msec").as() ), std::chrono::seconds( options.at("connection-cleanup-period").as() ),