From ad64fc77f0634bd1e2a5979a847dd8af2a2384e8 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 2 Apr 2024 13:05:15 -0500 Subject: [PATCH 01/31] GH-2102 Use vector of atomic bools to avoid mutex lock for duplicate check --- libraries/chain/hotstuff/hotstuff.cpp | 57 +++++++++---------- .../include/eosio/chain/hotstuff/hotstuff.hpp | 19 +++++-- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/libraries/chain/hotstuff/hotstuff.cpp b/libraries/chain/hotstuff/hotstuff.cpp index a0839affb8..1aaffabfad 100644 --- a/libraries/chain/hotstuff/hotstuff.cpp +++ b/libraries/chain/hotstuff/hotstuff.cpp @@ -21,33 +21,32 @@ inline std::vector bitset_to_vector(const hs_bitset& bs) { } bool pending_quorum_certificate::has_voted(size_t index) const { - std::lock_guard g(*_mtx); - return _strong_votes._bitset.at(index) || _weak_votes._bitset.at(index); + return _strong_votes.has_voted(index) || _weak_votes.has_voted(index); } bool pending_quorum_certificate::has_voted_no_lock(bool strong, size_t index) const { if (strong) { - return _strong_votes._bitset[index]; + return _strong_votes.has_voted(index); } - return _weak_votes._bitset[index]; + return _weak_votes.has_voted(index); +} + +bool pending_quorum_certificate::votes_t::has_voted(size_t index) const { + assert(index <= _processed.size()); + return _processed[index].load(std::memory_order_relaxed); } + vote_status pending_quorum_certificate::votes_t::add_vote(size_t index, const bls_signature& sig) { if (_bitset[index]) { // check here as could have come in while unlocked return vote_status::duplicate; // shouldn't be already present } + _processed[index].store(true, std::memory_order_relaxed); _bitset.set(index); _sig.aggregate(sig); // works even if _sig is default initialized (fp2::zero()) return vote_status::success; } -void pending_quorum_certificate::votes_t::reset(size_t num_finalizers) { - if (num_finalizers != _bitset.size()) - _bitset.resize(num_finalizers); - _bitset.reset(); - _sig = bls_aggregate_signature(); -} - pending_quorum_certificate::pending_quorum_certificate() : _mtx(std::make_unique()) { } @@ -55,9 +54,9 @@ pending_quorum_certificate::pending_quorum_certificate() pending_quorum_certificate::pending_quorum_certificate(size_t num_finalizers, uint64_t quorum, uint64_t max_weak_sum_before_weak_final) : _mtx(std::make_unique()) , _quorum(quorum) - , _max_weak_sum_before_weak_final(max_weak_sum_before_weak_final) { - _weak_votes.resize(num_finalizers); - _strong_votes.resize(num_finalizers); + , _max_weak_sum_before_weak_final(max_weak_sum_before_weak_final) + , _weak_votes(num_finalizers) + , _strong_votes(num_finalizers) { } bool pending_quorum_certificate::is_quorum_met() const { @@ -133,25 +132,23 @@ vote_status pending_quorum_certificate::add_vote(block_num_type block_num, bool const bls_public_key& pubkey, const bls_signature& sig, uint64_t weight) { vote_status s = vote_status::success; - std::unique_lock g(*_mtx); - state_t pre_state = _state; - state_t post_state = pre_state; if (has_voted_no_lock(strong, index)) { - s = vote_status::duplicate; - } else { - g.unlock(); - if (!fc::crypto::blslib::verify(pubkey, proposal_digest, sig)) { - wlog( "signature from finalizer ${i} cannot be verified", ("i", index) ); - s = vote_status::invalid_signature; - } else { - g.lock(); - s = strong ? add_strong_vote(index, sig, weight) - : add_weak_vote(index, sig, weight); - post_state = _state; - g.unlock(); - } + dlog("block_num: ${bn}, vote strong: ${sv}, duplicate", ("bn", block_num)("sv", strong)); + return vote_status::duplicate; + } + + if (!fc::crypto::blslib::verify(pubkey, proposal_digest, sig)) { + wlog( "signature from finalizer ${i} cannot be verified", ("i", index) ); + return vote_status::invalid_signature; } + std::unique_lock g(*_mtx); + state_t pre_state = _state; + s = strong ? add_strong_vote(index, sig, weight) + : add_weak_vote(index, sig, weight); + state_t post_state = _state; + g.unlock(); + dlog("block_num: ${bn}, vote strong: ${sv}, status: ${s}, pre-state: ${pre}, post-state: ${state}, quorum_met: ${q}", ("bn", block_num)("sv", strong)("s", s)("pre", pre_state)("state", post_state)("q", is_quorum_met(post_state))); return s; diff --git a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp index a98cbd023a..f67f7d65be 100644 --- a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp +++ b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp @@ -79,15 +79,22 @@ namespace eosio::chain { }; struct votes_t { + private: + friend struct fc::reflector; + friend class pending_quorum_certificate; hs_bitset _bitset; bls_aggregate_signature _sig; + std::vector> _processed; // avoid locking mutex for _bitset duplicate check - void resize(size_t num_finalizers) { _bitset.resize(num_finalizers); } - size_t count() const { return _bitset.count(); } + public: + explicit votes_t(size_t num_finalizers) + : _bitset(num_finalizers) + , _processed(num_finalizers) {} - vote_status add_vote(size_t index, const bls_signature& sig); + // thread safe + bool has_voted(size_t index) const; - void reset(size_t num_finalizers); + vote_status add_vote(size_t index, const bls_signature& sig); }; pending_quorum_certificate(); @@ -124,8 +131,8 @@ namespace eosio::chain { state_t _state { state_t::unrestricted }; uint64_t _strong_sum {0}; // accumulated sum of strong votes so far uint64_t _weak_sum {0}; // accumulated sum of weak votes so far - votes_t _weak_votes; - votes_t _strong_votes; + votes_t _weak_votes {0}; + votes_t _strong_votes {0}; // called by add_vote, already protected by mutex vote_status add_strong_vote(size_t index, From 3887b2c1a24467ddd1be9ee24d7e52bc8168e061 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 2 Apr 2024 16:23:34 -0500 Subject: [PATCH 02/31] GH-2102 Add block to fork db as soon as header is validated so it is available for vote processing --- libraries/chain/controller.cpp | 27 +++++++++++++-------- plugins/net_plugin/net_plugin.cpp | 2 +- plugins/producer_plugin/producer_plugin.cpp | 4 --- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 599d040dae..fe41683d00 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3650,8 +3650,8 @@ struct controller_impl { } // thread safe, expected to be called from thread other than the main thread - template - block_handle create_block_state_i( const block_id_type& id, const signed_block_ptr& b, const BS& prev ) { + template + block_handle create_block_state_i( ForkDB& forkdb, const block_id_type& id, const signed_block_ptr& b, const BS& prev ) { constexpr bool savanna_mode = std::is_same_v, block_state>; if constexpr (savanna_mode) { // Verify claim made by instant_finality_extension in block header extension and @@ -3679,13 +3679,16 @@ struct controller_impl { EOS_ASSERT( id == bsp->id(), block_validate_exception, "provided id ${id} does not match block id ${bid}", ("id", id)("bid", bsp->id()) ); + + forkdb.add(bsp, mark_valid_t::no, ignore_duplicate_t::yes); + return block_handle{bsp}; } std::future create_block_handle_future( const block_id_type& id, const signed_block_ptr& b ) { EOS_ASSERT( b, block_validate_exception, "null block" ); - auto f = [&](const auto& forkdb) -> std::future { + auto f = [&](auto& forkdb) -> std::future { return post_async_task( thread_pool.get_executor(), [b, id, &forkdb, control=this]() { // no reason for a block_state if fork_db already knows about block auto existing = forkdb.get_block( id ); @@ -3695,7 +3698,7 @@ struct controller_impl { EOS_ASSERT( prev, unlinkable_block_exception, "unlinkable block ${id} previous ${p}", ("id", id)("p", b->previous) ); - return control->create_block_state_i( id, b, *prev ); + return control->create_block_state_i( forkdb, id, b, *prev ); } ); }; @@ -3719,7 +3722,7 @@ struct controller_impl { std::optional create_block_handle( const block_id_type& id, const signed_block_ptr& b ) { EOS_ASSERT( b, block_validate_exception, "null block" ); - auto f = [&](const auto& forkdb) -> std::optional { + auto f = [&](auto& forkdb) -> std::optional { // no reason for a block_state if fork_db already knows about block auto existing = forkdb.get_block( id ); EOS_ASSERT( !existing, fork_database_exception, "we already know about this block: ${id}", ("id", id) ); @@ -3728,13 +3731,17 @@ struct controller_impl { auto prev = forkdb.get_block( b->previous, include_root_t::yes ); if( !prev ) return {}; - return create_block_state_i( id, b, *prev ); + return create_block_state_i( forkdb, id, b, *prev ); + }; + + auto unlinkable = [&](const auto&) -> std::optional { + return {}; }; if (!b->is_proper_svnn_block()) { - return fork_db.apply_l>(f); + return fork_db.apply>(f, unlinkable); } - return fork_db.apply_s>(f); + return fork_db.apply>(unlinkable, f); } // expected to be called from application thread as it modifies bsp->valid_qc and if_irreversible_block_id @@ -3807,7 +3814,7 @@ struct controller_impl { auto do_accept_block = [&](auto& forkdb) { if constexpr (std::is_same_v>) - forkdb.add( bsp, mark_valid_t::no, ignore_duplicate_t::no ); + forkdb.add( bsp, mark_valid_t::no, ignore_duplicate_t::yes ); emit( accepted_block_header, std::tie(bsp->block, bsp->id()) ); }; @@ -3846,7 +3853,7 @@ struct controller_impl { auto do_push = [&](auto& forkdb) { if constexpr (std::is_same_v>) { - forkdb.add( bsp, mark_valid_t::no, ignore_duplicate_t::no ); + forkdb.add( bsp, mark_valid_t::no, ignore_duplicate_t::yes ); } if (is_trusted_producer(b->producer)) { diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 1bbf736ef3..709d820f38 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -3847,7 +3847,7 @@ namespace eosio { uint32_t lib = cc.last_irreversible_block_num(); try { - if( blk_num <= lib || cc.block_exists(blk_id) ) { + if( blk_num <= lib ) { c->strand.post( [sync_master = my_impl->sync_master.get(), &dispatcher = my_impl->dispatcher, c, blk_id, blk_num]() { dispatcher.add_peer_block( blk_id, c->connection_id ); diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index f43668489d..d0cb33fbbc 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -688,10 +688,6 @@ class producer_plugin_impl : public std::enable_shared_from_thischain(); - // de-dupe here... no point in aborting block if we already know the block; avoid exception in create_block_handle_future - if (chain.block_exists(id)) { - return true; // return true because the block is accepted - } EOS_ASSERT(block->timestamp < (now + fc::seconds(7)), block_from_the_future, "received a block from the future, ignoring it: ${id}", ("id", id)); From bd43d61f2fc9646139c31a16f4fba6bbe136827f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 3 Apr 2024 08:52:12 -0500 Subject: [PATCH 03/31] GH-2102 May already be in forkdb since we are adding as soon as header validation complete --- libraries/chain/controller.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index fe41683d00..d25440d520 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3690,10 +3690,6 @@ struct controller_impl { auto f = [&](auto& forkdb) -> std::future { return post_async_task( thread_pool.get_executor(), [b, id, &forkdb, control=this]() { - // no reason for a block_state if fork_db already knows about block - auto existing = forkdb.get_block( id ); - EOS_ASSERT( !existing, fork_database_exception, "we already know about this block: ${id}", ("id", id) ); - auto prev = forkdb.get_block( b->previous, include_root_t::yes ); EOS_ASSERT( prev, unlinkable_block_exception, "unlinkable block ${id} previous ${p}", ("id", id)("p", b->previous) ); @@ -3723,10 +3719,6 @@ struct controller_impl { EOS_ASSERT( b, block_validate_exception, "null block" ); auto f = [&](auto& forkdb) -> std::optional { - // no reason for a block_state if fork_db already knows about block - auto existing = forkdb.get_block( id ); - EOS_ASSERT( !existing, fork_database_exception, "we already know about this block: ${id}", ("id", id) ); - // previous not found could mean that previous block not applied yet auto prev = forkdb.get_block( b->previous, include_root_t::yes ); if( !prev ) return {}; From c84bab166cc6daaebaec89a2ca6e9cb0dd6f384a Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 3 Apr 2024 11:37:50 -0500 Subject: [PATCH 04/31] GH-2102 Add logging on app().quit() --- plugins/net_plugin/net_plugin.cpp | 1 + programs/nodeos/main.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 709d820f38..db0d97c3dd 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -4376,6 +4376,7 @@ namespace eosio { set_producer_accounts(producer_plug->producer_accounts()); thread_pool.start( thread_pool_size, []( const fc::exception& e ) { + elog("Exception in net thread, exiting: ${e}", ("e", e.to_detail_string())); app().quit(); } ); diff --git a/programs/nodeos/main.cpp b/programs/nodeos/main.cpp index a13f0abcce..44f3572e06 100644 --- a/programs/nodeos/main.cpp +++ b/programs/nodeos/main.cpp @@ -161,6 +161,10 @@ int main(int argc, char** argv) uint32_t short_hash = 0; fc::from_hex(eosio::version::version_hash(), (char*)&short_hash, sizeof(short_hash)); + app->set_stop_executor_cb([&app]() { + ilog("appbase quit called"); + app->get_io_service().stop(); + }); app->set_version(htonl(short_hash)); app->set_version_string(eosio::version::version_client()); app->set_full_version_string(eosio::version::version_full()); From cf5033607e528d5a8afe1bf86e2f69b3b4608819 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 3 Apr 2024 12:39:35 -0500 Subject: [PATCH 05/31] GH-2102 Integrate qc and vote when switching forks if first time blocks are validated --- libraries/chain/controller.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index d25440d520..6e8174cc09 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3794,15 +3794,20 @@ struct controller_impl { } } + template + void integrate_qc(const BSP& bsp) { + if constexpr (std::is_same_v) { + integrate_received_qc_to_block(bsp); + consider_voting(bsp); + } + } + template void accept_block(const BSP& bsp) { assert(bsp && bsp->block); // Save the received QC as soon as possible, no matter whether the block itself is valid or not - if constexpr (std::is_same_v) { - integrate_received_qc_to_block(bsp); - consider_voting(bsp); - } + integrate_qc(bsp); auto do_accept_block = [&](auto& forkdb) { if constexpr (std::is_same_v>) @@ -3823,10 +3828,7 @@ struct controller_impl { assert(bsp && bsp->block); // Save the received QC as soon as possible, no matter whether the block itself is valid or not - if constexpr (std::is_same_v) { - integrate_received_qc_to_block(bsp); - consider_voting(bsp); - } + integrate_qc(bsp); controller::block_status s = controller::block_status::complete; EOS_ASSERT(!pending, block_validate_exception, "it is not valid to push a block when there is a pending block"); @@ -3989,9 +3991,13 @@ struct controller_impl { for( auto ritr = branches.first.rbegin(); ritr != branches.first.rend(); ++ritr ) { auto except = std::exception_ptr{}; try { + bool valid = (*ritr)->is_valid(); + if (!valid) // has not been validated (applied) before, only in forkdb, integrate and possibly vote now + integrate_qc(*ritr); + br = controller::block_report{}; - apply_block( br, *ritr, (*ritr)->is_valid() ? controller::block_status::validated - : controller::block_status::complete, trx_lookup ); + apply_block( br, *ritr, valid ? controller::block_status::validated + : controller::block_status::complete, trx_lookup ); } catch ( const std::bad_alloc& ) { throw; } catch ( const boost::interprocess::bad_alloc& ) { From 053fe72749103a0fd89b9c9f999c09a17e3cab81 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 3 Apr 2024 14:41:33 -0500 Subject: [PATCH 06/31] GH-2102 Log irreversible even when head not updated --- libraries/chain/controller.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 6e8174cc09..0c1b096737 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -4049,8 +4049,8 @@ struct controller_impl { head_changed = false; } - if( head_changed ) - log_irreversible(); + // irreversible can change even if block not applied to head, integrated qc can move LIB + log_irreversible(); }; fork_db.apply(do_maybe_switch_forks); From 40661090a5db4abf936ed446c853d3bbdcee6105 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 07:39:31 -0500 Subject: [PATCH 07/31] GH-2102 Check for validated block to avoid extra processing --- libraries/chain/controller.cpp | 14 +++++++++++++ libraries/chain/fork_database.cpp | 15 +++++++++++++- .../chain/include/eosio/chain/controller.hpp | 3 ++- .../include/eosio/chain/fork_database.hpp | 1 + plugins/net_plugin/net_plugin.cpp | 2 +- plugins/producer_plugin/producer_plugin.cpp | 20 +++++++++---------- 6 files changed, 42 insertions(+), 13 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 0c1b096737..a4be1c536c 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -1070,6 +1070,12 @@ struct controller_impl { }); } + bool fork_db_validated_block_exists( const block_id_type& id ) const { + return fork_db.apply([&](const auto& forkdb) { + return forkdb.validated_block_exists(id); + }); + } + signed_block_ptr fork_db_fetch_block_by_id( const block_id_type& id ) const { return fork_db.apply([&](const auto& forkdb) { auto bsp = forkdb.get_block(id); @@ -4930,6 +4936,14 @@ bool controller::block_exists(const block_id_type& id) const { return false; } +bool controller::validated_block_exists(const block_id_type& id) const { + bool exists = my->fork_db_validated_block_exists(id); + if( exists ) return true; + std::optional sbh = my->blog.read_block_header_by_num( block_header::num_from_id(id) ); + if( sbh && sbh->calculate_id() == id ) return true; + return false; +} + std::optional controller::fetch_block_header_by_id( const block_id_type& id )const { auto sb_ptr = my->fork_db_fetch_block_by_id(id); if( sb_ptr ) return *static_cast(sb_ptr.get()); diff --git a/libraries/chain/fork_database.cpp b/libraries/chain/fork_database.cpp index 5cda182450..c054836dc5 100644 --- a/libraries/chain/fork_database.cpp +++ b/libraries/chain/fork_database.cpp @@ -128,6 +128,7 @@ namespace eosio::chain { bsp_t get_block_impl( const block_id_type& id, include_root_t include_root = include_root_t::no ) const; bool block_exists_impl( const block_id_type& id ) const; + bool validated_block_exists_impl( const block_id_type& id ) const; void reset_root_impl( const bsp_t& root_bs ); void rollback_head_to_root_impl(); void advance_root_impl( const block_id_type& id ); @@ -664,7 +665,19 @@ namespace eosio::chain { return index.find( id ) != index.end(); } - // ------------------ fork_database ------------------------- + template + bool fork_database_t::validated_block_exists(const block_id_type& id) const { + std::lock_guard g( my->mtx ); + return my->validated_block_exists_impl(id); + } + + template + bool fork_database_impl::validated_block_exists_impl(const block_id_type& id) const { + auto itr = index.find( id ); + return itr != index.end() && bs_accessor_t::is_valid(*(*itr)); + } + +// ------------------ fork_database ------------------------- fork_database::fork_database(const std::filesystem::path& data_dir) : data_dir(data_dir) diff --git a/libraries/chain/include/eosio/chain/controller.hpp b/libraries/chain/include/eosio/chain/controller.hpp index 0eeed40015..fc9044aece 100644 --- a/libraries/chain/include/eosio/chain/controller.hpp +++ b/libraries/chain/include/eosio/chain/controller.hpp @@ -276,7 +276,8 @@ namespace eosio::chain { // thread-safe signed_block_ptr fetch_block_by_id( const block_id_type& id )const; // thread-safe - bool block_exists( const block_id_type& id)const; + bool block_exists(const block_id_type& id) const; + bool validated_block_exists(const block_id_type& id) const; // thread-safe std::optional fetch_block_header_by_number( uint32_t block_num )const; // thread-safe diff --git a/libraries/chain/include/eosio/chain/fork_database.hpp b/libraries/chain/include/eosio/chain/fork_database.hpp index 4f26eb547a..39d37cef00 100644 --- a/libraries/chain/include/eosio/chain/fork_database.hpp +++ b/libraries/chain/include/eosio/chain/fork_database.hpp @@ -51,6 +51,7 @@ namespace eosio::chain { bsp_t get_block( const block_id_type& id, include_root_t include_root = include_root_t::no ) const; bool block_exists( const block_id_type& id ) const; + bool validated_block_exists( const block_id_type& id ) const; /** * Purges any existing blocks from the fork database and resets the root block_header_state to the provided value. diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index db0d97c3dd..852f2a0d1d 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -3847,7 +3847,7 @@ namespace eosio { uint32_t lib = cc.last_irreversible_block_num(); try { - if( blk_num <= lib ) { + if( blk_num <= lib || cc.validated_block_exists(blk_id) ) { c->strand.post( [sync_master = my_impl->sync_master.get(), &dispatcher = my_impl->dispatcher, c, blk_id, blk_num]() { dispatcher.add_peer_block( blk_id, c->connection_id ); diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index d0cb33fbbc..976a08b77a 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -677,18 +677,14 @@ class producer_plugin_impl : public std::enable_shared_from_thistimestamp < fc::minutes(5) || (blk_num % 1000 == 0)) // only log every 1000 during sync fc_dlog(_log, "received incoming block ${n} ${id}", ("n", blk_num)("id", id)); - // start a new speculative block, speculative start_block may have been interrupted - auto ensure = fc::make_scoped_exit([this]() { - // avoid schedule_production_loop if in_producing_mode(); speculative block was not interrupted and we don't want to abort block - if (!in_producing_mode()) { - schedule_production_loop(); - } else { - _time_tracker.add_other_time(); - } - }); - auto& chain = chain_plug->chain(); + // de-dupe here... no point in aborting block if we already know the block; avoid exception in create_block_handle_future + if (chain.validated_block_exists(id)) { + _time_tracker.add_other_time(); + return true; // return true because the block was already accepted + } + EOS_ASSERT(block->timestamp < (now + fc::seconds(7)), block_from_the_future, "received a block from the future, ignoring it: ${id}", ("id", id)); // start processing of block @@ -701,9 +697,13 @@ class producer_plugin_impl : public std::enable_shared_from_this Date: Thu, 4 Apr 2024 09:28:13 -0500 Subject: [PATCH 08/31] GH-2102 Init processed on reflection --- libraries/chain/hotstuff/hotstuff.cpp | 9 +++++++++ .../chain/include/eosio/chain/hotstuff/hotstuff.hpp | 6 +++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/libraries/chain/hotstuff/hotstuff.cpp b/libraries/chain/hotstuff/hotstuff.cpp index 1aaffabfad..262efde066 100644 --- a/libraries/chain/hotstuff/hotstuff.cpp +++ b/libraries/chain/hotstuff/hotstuff.cpp @@ -31,6 +31,15 @@ bool pending_quorum_certificate::has_voted_no_lock(bool strong, size_t index) co return _weak_votes.has_voted(index); } +void pending_quorum_certificate::votes_t::reflector_init() { + _processed = std::vector>(_bitset.size()); + for (size_t i = 0; i < _bitset.size(); ++i) { + if (_bitset[i]) { + _processed[i].store(true, std::memory_order_relaxed); + } + } +} + bool pending_quorum_certificate::votes_t::has_voted(size_t index) const { assert(index <= _processed.size()); return _processed[index].load(std::memory_order_relaxed); diff --git a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp index f67f7d65be..7fae2ceb78 100644 --- a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp +++ b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp @@ -78,14 +78,18 @@ namespace eosio::chain { strong // Enough `strong` votes to have a valid `strong` QC }; - struct votes_t { + struct votes_t : fc::reflect_init { private: friend struct fc::reflector; + friend struct fc::reflector_init_visitor; + friend struct fc::has_reflector_init; friend class pending_quorum_certificate; + hs_bitset _bitset; bls_aggregate_signature _sig; std::vector> _processed; // avoid locking mutex for _bitset duplicate check + void reflector_init(); public: explicit votes_t(size_t num_finalizers) : _bitset(num_finalizers) From 0c82e7abe5260a3a92310f396a1fc4c8001f826f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 10:58:21 -0500 Subject: [PATCH 09/31] GH-2102 Handle corner case of trx locally applied but not in a block yet --- tests/trx_finality_status_forked_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/trx_finality_status_forked_test.py b/tests/trx_finality_status_forked_test.py index a76f75d9f7..073fa3c3b9 100755 --- a/tests/trx_finality_status_forked_test.py +++ b/tests/trx_finality_status_forked_test.py @@ -186,6 +186,7 @@ def getBlockID(status): if state == irreversibleState: Print(f"Transaction became irreversible before it could be found forked out: {json.dumps(retStatus, indent=1)}") + testSuccessful = True sys.exit(0) assert state == forkedOutState, \ @@ -209,7 +210,7 @@ def getBlockID(status): info = prodD.getInfo() retStatus = prodD.getTransactionStatus(transId) state = getState(retStatus) - blockNum = getBlockNum(retStatus) + blockNum = getBlockNum(retStatus) + 2 # Add 2 to give time to move from locally applied to in-block if (state == inBlockState or state == irreversibleState) or ( info['head_block_producer'] == 'defproducerd' and info['last_irreversible_block_num'] > blockNum ): break From b9deb5836d3c5a67db1d77318479d82b36695718 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 10:59:13 -0500 Subject: [PATCH 10/31] GH-2102 Only report fork switch on actual fork switch. maybe_switch_forks now also can apply staged blocks in the fork db. --- libraries/chain/controller.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index a4be1c536c..94beaefda3 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3955,7 +3955,6 @@ struct controller_impl { const forked_callback_t& forked_cb, const trx_meta_cache_lookup& trx_lookup ) { auto do_maybe_switch_forks = [&](auto& forkdb) { - bool head_changed = true; if( new_head->header.previous == chain_head.id() ) { try { apply_block( br, new_head, s, trx_lookup ); @@ -3965,18 +3964,18 @@ struct controller_impl { } } else if( new_head->id() != chain_head.id() ) { auto head_fork_comp_str = apply(chain_head, [](auto& head) -> std::string { return log_fork_comparison(*head); }); - ilog("switching forks from ${current_head_id} (block number ${current_head_num}) ${c} to ${new_head_id} (block number ${new_head_num}) ${n}", - ("current_head_id", chain_head.id())("current_head_num", chain_head.block_num())("new_head_id", new_head->id())("new_head_num", new_head->block_num()) - ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); - - // not possible to log transaction specific infor when switching forks - if (auto dm_logger = get_deep_mind_logger(false)) { - dm_logger->on_switch_forks(chain_head.id(), new_head->id()); - } - auto branches = forkdb.fetch_branch_from( new_head->id(), chain_head.id() ); if( branches.second.size() > 0 ) { + ilog("switching forks from ${current_head_id} (block number ${current_head_num}) ${c} to ${new_head_id} (block number ${new_head_num}) ${n}", + ("current_head_id", chain_head.id())("current_head_num", chain_head.block_num())("new_head_id", new_head->id())("new_head_num", new_head->block_num()) + ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); + + // not possible to log transaction specific info when switching forks + if (auto dm_logger = get_deep_mind_logger(false)) { + dm_logger->on_switch_forks(chain_head.id(), new_head->id()); + } + for( auto itr = branches.second.begin(); itr != branches.second.end(); ++itr ) { pop_block(); } @@ -3992,6 +3991,10 @@ struct controller_impl { } } } + } else { + ilog("applying fork db blocks from ${cbn}:${cbid} ${c} to ${nbn}:${nbid} ${n}", + ("cbid", chain_head.id())("cbn", chain_head.block_num())("nbid", new_head->id())("nbn", new_head->block_num()) + ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); } for( auto ritr = branches.first.rbegin(); ritr != branches.first.rend(); ++ritr ) { @@ -4051,8 +4054,6 @@ struct controller_impl { ilog("successfully switched fork to new head ${new_head_id}, removed {${rm_ids}}, applied {${new_ids}}", ("new_head_id", new_head->id())("rm_ids", get_ids(branches.second))("new_ids", get_ids(branches.first))); } - } else { - head_changed = false; } // irreversible can change even if block not applied to head, integrated qc can move LIB From 8a7b973f8129788576431b75d732511adf951ab4 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 11:23:20 -0500 Subject: [PATCH 11/31] GH-2102 Check for terminate_at_block during apply of fork db blocks --- libraries/chain/controller.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 94beaefda3..be0b82fd08 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -4007,6 +4007,12 @@ struct controller_impl { br = controller::block_report{}; apply_block( br, *ritr, valid ? controller::block_status::validated : controller::block_status::complete, trx_lookup ); + + if( conf.terminate_at_block > 0 && conf.terminate_at_block <= chain_head.block_num()) { + ilog("Reached configured maximum block ${num}; terminating", ("num", conf.terminate_at_block) ); + shutdown(); + return; + } } catch ( const std::bad_alloc& ) { throw; } catch ( const boost::interprocess::bad_alloc& ) { From 274e44a7827cc9ee3a6573dc4954ade5ba232728 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 12:22:07 -0500 Subject: [PATCH 12/31] GH-2102 If forked out again then could still be in local state --- tests/trx_finality_status_forked_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trx_finality_status_forked_test.py b/tests/trx_finality_status_forked_test.py index 073fa3c3b9..89e5bf735a 100755 --- a/tests/trx_finality_status_forked_test.py +++ b/tests/trx_finality_status_forked_test.py @@ -205,7 +205,7 @@ def getBlockID(status): state = getState(retStatus) # it is possible for another fork switch to cause the trx to be forked out again - if state == forkedOutState: + if state == forkedOutState or state == localState: while True: info = prodD.getInfo() retStatus = prodD.getTransactionStatus(transId) From 2e497947dd7c9c17990393ed633fc6ba49c7265e Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 13:56:53 -0500 Subject: [PATCH 13/31] GH-2102 Allow ctrl-c shutdown during sync of a large number of blocks. Improve switch logging. --- libraries/chain/controller.cpp | 49 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index be0b82fd08..d7b8ed464b 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -894,6 +894,7 @@ struct controller_impl { #endif controller& self; std::function shutdown; + std::function check_shutdown; chainbase::database db; block_log blog; std::optional pending; @@ -1493,7 +1494,7 @@ struct controller_impl { enum class startup_t { genesis, snapshot, existing_state }; - std::exception_ptr replay_block_log(const std::function& check_shutdown) { + std::exception_ptr replay_block_log() { auto blog_head = blog.head(); if (!blog_head) { ilog( "no block log found" ); @@ -1585,7 +1586,7 @@ struct controller_impl { return except_ptr; } - void replay(const std::function& check_shutdown, startup_t startup) { + void replay(startup_t startup) { replaying = true; auto blog_head = blog.head(); @@ -1593,7 +1594,7 @@ struct controller_impl { std::exception_ptr except_ptr; if (blog_head) { - except_ptr = replay_block_log(check_shutdown); + except_ptr = replay_block_log(); } else { ilog( "no block log found" ); } @@ -1699,7 +1700,10 @@ struct controller_impl { void startup(std::function shutdown, std::function check_shutdown, const snapshot_reader_ptr& snapshot) { EOS_ASSERT( snapshot, snapshot_exception, "No snapshot reader provided" ); - this->shutdown = shutdown; + this->shutdown = std::move(shutdown); + assert(this->shutdown); + this->check_shutdown = std::move(check_shutdown); + assert(this->check_shutdown); try { auto snapshot_load_start_time = fc::time_point::now(); snapshot->validate(); @@ -1718,7 +1722,7 @@ struct controller_impl { } ilog( "Snapshot loaded, lib: ${lib}", ("lib", chain_head.block_num()) ); - init(std::move(check_shutdown), startup_t::snapshot); + init(startup_t::snapshot); apply_l(chain_head, [&](auto& head) { if (block_states.second && head->header.contains_header_extension(instant_finality_extension::extension_id())) { // snapshot generated in transition to savanna @@ -1744,6 +1748,9 @@ struct controller_impl { ); this->shutdown = std::move(shutdown); + assert(this->shutdown); + this->check_shutdown = std::move(check_shutdown); + assert(this->check_shutdown); initialize_blockchain_state(genesis); // sets chain_head to genesis state @@ -1753,7 +1760,7 @@ struct controller_impl { blog.reset( genesis, chain_head.block() ); } - init(std::move(check_shutdown), startup_t::genesis); + init(startup_t::genesis); } void startup(std::function shutdown, std::function check_shutdown) { @@ -1766,6 +1773,9 @@ struct controller_impl { "No existing fork database despite existing chain state. Replay required." ); this->shutdown = std::move(shutdown); + assert(this->shutdown); + this->check_shutdown = std::move(check_shutdown); + assert(this->check_shutdown); uint32_t lib_num = fork_db_root_block_num(); auto first_block_num = blog.first_block_num(); if( auto blog_head = blog.head() ) { @@ -1792,7 +1802,7 @@ struct controller_impl { fork_db.apply(do_startup); - init(std::move(check_shutdown), startup_t::existing_state); + init(startup_t::existing_state); } @@ -1809,7 +1819,7 @@ struct controller_impl { return header_itr; } - void init(std::function check_shutdown, startup_t startup) { + void init(startup_t startup) { auto header_itr = validate_db_version( db ); { @@ -1852,7 +1862,7 @@ struct controller_impl { ilog( "chain database started with hash: ${hash}", ("hash", calculate_integrity_hash()) ); okay_to_print_integrity_hash_on_stop = true; - replay( check_shutdown, startup ); // replay any irreversible and reversible blocks ahead of current head + replay( startup ); // replay any irreversible and reversible blocks ahead of current head if( check_shutdown() ) return; @@ -3963,10 +3973,11 @@ struct controller_impl { throw; } } else if( new_head->id() != chain_head.id() ) { - auto head_fork_comp_str = apply(chain_head, [](auto& head) -> std::string { return log_fork_comparison(*head); }); auto branches = forkdb.fetch_branch_from( new_head->id(), chain_head.id() ); - if( branches.second.size() > 0 ) { + bool switch_fork = !branches.second.empty(); + if( switch_fork ) { + auto head_fork_comp_str = apply(chain_head, [](auto& head) -> std::string { return log_fork_comparison(*head); }); ilog("switching forks from ${current_head_id} (block number ${current_head_num}) ${c} to ${new_head_id} (block number ${new_head_num}) ${n}", ("current_head_id", chain_head.id())("current_head_num", chain_head.block_num())("new_head_id", new_head->id())("new_head_num", new_head->block_num()) ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); @@ -3991,10 +4002,10 @@ struct controller_impl { } } } - } else { - ilog("applying fork db blocks from ${cbn}:${cbid} ${c} to ${nbn}:${nbid} ${n}", - ("cbid", chain_head.id())("cbn", chain_head.block_num())("nbid", new_head->id())("nbn", new_head->block_num()) - ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); + } else if (!branches.first.empty()) { + ilog("applying ${n} fork db blocks from ${cbn}:${cbid} to ${nbn}:${nbid}", + ("n", branches.first.size())("cbid", (*branches.first.rbegin())->id())("cbn", (*branches.first.rbegin())->block_num()) + ("nbid", new_head->id())("nbn", new_head->block_num())); } for( auto ritr = branches.first.rbegin(); ritr != branches.first.rend(); ++ritr ) { @@ -4011,7 +4022,11 @@ struct controller_impl { if( conf.terminate_at_block > 0 && conf.terminate_at_block <= chain_head.block_num()) { ilog("Reached configured maximum block ${num}; terminating", ("num", conf.terminate_at_block) ); shutdown(); - return; + break; + } + if (!switch_fork && check_shutdown()) { + shutdown(); + break; } } catch ( const std::bad_alloc& ) { throw; @@ -4048,7 +4063,7 @@ struct controller_impl { } // end if exception } /// end for each block in branch - if (fc::logger::get(DEFAULT_LOGGER).is_enabled(fc::log_level::info)) { + if (switch_fork && fc::logger::get(DEFAULT_LOGGER).is_enabled(fc::log_level::info)) { auto get_ids = [&](auto& container)->std::string { std::string ids; for(auto ritr = container.rbegin(), e = container.rend(); ritr != e; ++ritr) { From 4c3535f728829119c337ba2a5dfcd544129272b1 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 13:57:30 -0500 Subject: [PATCH 14/31] GH-2102 Fix sync issue with receiving a current block while syncing --- plugins/net_plugin/net_plugin.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 852f2a0d1d..8da4b26a70 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -2540,7 +2540,11 @@ namespace eosio { if (sync_last_requested_num == 0) { // block was rejected sync_next_expected_num = my_impl->get_chain_lib_num() + 1; } else { - sync_next_expected_num = blk_num + 1; + if (blk_num == sync_next_expected_num) { + ++sync_next_expected_num; + } else if (blk_num < sync_next_expected_num) { + sync_next_expected_num = blk_num + 1; + } } } @@ -3140,7 +3144,6 @@ namespace eosio { } } else { block_sync_bytes_received += message_length; - my_impl->sync_master->sync_recv_block(shared_from_this(), blk_id, blk_num, false); uint32_t lib_num = my_impl->get_chain_lib_num(); if( blk_num <= lib_num ) { cancel_wait(); @@ -3148,6 +3151,7 @@ namespace eosio { pending_message_buffer.advance_read_ptr( message_length ); return true; } + my_impl->sync_master->sync_recv_block(shared_from_this(), blk_id, blk_num, false); } auto ds = pending_message_buffer.create_datastream(); From 112bb69a099865f2ab9aff807de5fc7421b6c734 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 14:23:19 -0500 Subject: [PATCH 15/31] GH-2102 On startup pending_head forkdb blocks are processed. If asked to terminate at a block don't put any blocks above that in the fork database. --- libraries/chain/controller.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index d7b8ed464b..5886403ae7 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3696,7 +3696,9 @@ struct controller_impl { EOS_ASSERT( id == bsp->id(), block_validate_exception, "provided id ${id} does not match block id ${bid}", ("id", id)("bid", bsp->id()) ); - forkdb.add(bsp, mark_valid_t::no, ignore_duplicate_t::yes); + if (conf.terminate_at_block > 0 && bsp->block_num() <= conf.terminate_at_block) { + forkdb.add(bsp, mark_valid_t::no, ignore_duplicate_t::yes); + } return block_handle{bsp}; } From 914f218d3c19f650a51bf84765f502436514679e Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 20:20:10 -0500 Subject: [PATCH 16/31] GH-2102 Fix comparison in waitForBlock --- tests/TestHarness/Node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/TestHarness/Node.py b/tests/TestHarness/Node.py index d23f42d2f9..ddcd6ec887 100644 --- a/tests/TestHarness/Node.py +++ b/tests/TestHarness/Node.py @@ -208,7 +208,7 @@ def waitForNextBlock(self, timeout=None, blockType=BlockType.head): return ret def waitForBlock(self, blockNum, timeout=None, blockType=BlockType.head, reportInterval=None): - lam = lambda: self.getBlockNum(blockType=blockType) > blockNum + lam = lambda: self.getBlockNum(blockType=blockType) >= blockNum blockDesc = "head" if blockType == BlockType.head else "LIB" count = 0 From 5e9af0cc67f8966d3c07465a31ff7c9c847db05d Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 20:34:48 -0500 Subject: [PATCH 17/31] GH-2102 Better error reporting and a bit more tolerance for trxs in block --- tests/nodeos_startup_catchup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/nodeos_startup_catchup.py b/tests/nodeos_startup_catchup.py index c437b1e858..1ae2142f6d 100755 --- a/tests/nodeos_startup_catchup.py +++ b/tests/nodeos_startup_catchup.py @@ -133,9 +133,11 @@ def waitForNodeStarted(node): steadyStateAvg=steadyStateWindowTrxs / steadyStateWindowBlks Print("Validate transactions are generating") - minReqPctLeeway=0.9 + minReqPctLeeway=0.85 minRequiredTransactions=minReqPctLeeway*transactionsPerBlock - assert steadyStateAvg>=minRequiredTransactions, "Expected to at least receive %s transactions per block, but only getting %s" % (minRequiredTransactions, steadyStateAvg) + assert steadyStateAvg>=minRequiredTransactions, \ + (f"Expected to at least receive {minRequiredTransactions} transactions per block, " + f"but only getting {steadyStateAvg} for blocks {startBlockNum} - {endBlockNum}") Print("Cycle through catchup scenarios") twoRounds=21*2*12 From 244983078bc5940612f2fcd2126e8870be1dce83 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 21:02:25 -0500 Subject: [PATCH 18/31] GH-2102 Add additional logging for applied blocks --- libraries/chain/controller.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 5886403ae7..d6f078c84f 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -4013,13 +4013,22 @@ struct controller_impl { for( auto ritr = branches.first.rbegin(); ritr != branches.first.rend(); ++ritr ) { auto except = std::exception_ptr{}; try { - bool valid = (*ritr)->is_valid(); + const auto& bsp = *ritr; + bool valid = bsp->is_valid(); if (!valid) // has not been validated (applied) before, only in forkdb, integrate and possibly vote now - integrate_qc(*ritr); + integrate_qc(bsp); br = controller::block_report{}; - apply_block( br, *ritr, valid ? controller::block_status::validated + apply_block( br, bsp, valid ? controller::block_status::validated : controller::block_status::complete, trx_lookup ); + if (!valid) { // was just applied for first time so log it + ilog("Applied block ${id}... #${n} @ ${t} signed by ${p} " + "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", + ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) + ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) + ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) + ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); + } if( conf.terminate_at_block > 0 && conf.terminate_at_block <= chain_head.block_num()) { ilog("Reached configured maximum block ${num}; terminating", ("num", conf.terminate_at_block) ); From e464aeea03446019d12665c9cdf0ad37aa70dfce Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 4 Apr 2024 21:05:25 -0500 Subject: [PATCH 19/31] Revert "GH-2102 Fix comparison in waitForBlock" This reverts commit 914f218d3c19f650a51bf84765f502436514679e. --- tests/TestHarness/Node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/TestHarness/Node.py b/tests/TestHarness/Node.py index ddcd6ec887..d23f42d2f9 100644 --- a/tests/TestHarness/Node.py +++ b/tests/TestHarness/Node.py @@ -208,7 +208,7 @@ def waitForNextBlock(self, timeout=None, blockType=BlockType.head): return ret def waitForBlock(self, blockNum, timeout=None, blockType=BlockType.head, reportInterval=None): - lam = lambda: self.getBlockNum(blockType=blockType) >= blockNum + lam = lambda: self.getBlockNum(blockType=blockType) > blockNum blockDesc = "head" if blockType == BlockType.head else "LIB" count = 0 From a74450c8cee4ef0773ad977063c574cda59d2ec2 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 07:47:32 -0500 Subject: [PATCH 20/31] GH-2102 Improve logging during sync --- libraries/chain/controller.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index d6f078c84f..ebc13b51ba 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -1875,10 +1875,10 @@ struct controller_impl { auto pending_head = forkdb.pending_head(); auto head = forkdb.head(); if ( head && pending_head && pending_head->id() != head->id() && head->id() == forkdb.root()->id() ) { - wlog( "read_mode has changed from irreversible: applying best branch from fork database" ); + ilog( "read_mode has changed from irreversible: applying best branch from fork database" ); for( ; pending_head->id() != forkdb.head()->id(); pending_head = forkdb.pending_head() ) { - wlog( "applying branch from fork database ending with block: ${id}", ("id", pending_head->id()) ); + ilog( "applying branch from fork database ending with block: ${id}", ("id", pending_head->id()) ); controller::block_report br; maybe_switch_forks( br, pending_head, controller::block_status::complete, {}, trx_meta_cache_lookup{} ); } @@ -3696,7 +3696,7 @@ struct controller_impl { EOS_ASSERT( id == bsp->id(), block_validate_exception, "provided id ${id} does not match block id ${bid}", ("id", id)("bid", bsp->id()) ); - if (conf.terminate_at_block > 0 && bsp->block_num() <= conf.terminate_at_block) { + if (conf.terminate_at_block == 0 || bsp->block_num() <= conf.terminate_at_block) { forkdb.add(bsp, mark_valid_t::no, ignore_duplicate_t::yes); } @@ -4020,16 +4020,17 @@ struct controller_impl { br = controller::block_report{}; apply_block( br, bsp, valid ? controller::block_status::validated - : controller::block_status::complete, trx_lookup ); + : controller::block_status::complete, trx_lookup ); if (!valid) { // was just applied for first time so log it - ilog("Applied block ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", - ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) - ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) - ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) - ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); + if (fc::time_point::now() - bsp->timestamp() < fc::minutes(5) || (bsp->block_num() % 1000 == 0)) { + ilog("Applied block ${id}... #${n} @ ${t} signed by ${p} " + "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", + ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) + ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) + ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) + ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); + } } - if( conf.terminate_at_block > 0 && conf.terminate_at_block <= chain_head.block_num()) { ilog("Reached configured maximum block ${num}; terminating", ("num", conf.terminate_at_block) ); shutdown(); From cafd0b629bbcdbc454a372d643fd9b719fbb10ed Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 08:41:03 -0500 Subject: [PATCH 21/31] GH-2102 Add a large_atomic wrapper around mutex --- .../include/eosio/chain/thread_utils.hpp | 38 +++++++++++++++++++ plugins/net_plugin/net_plugin.cpp | 10 ----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/libraries/chain/include/eosio/chain/thread_utils.hpp b/libraries/chain/include/eosio/chain/thread_utils.hpp index d3e9e8a261..f687096aaa 100644 --- a/libraries/chain/include/eosio/chain/thread_utils.hpp +++ b/libraries/chain/include/eosio/chain/thread_utils.hpp @@ -12,6 +12,44 @@ namespace eosio { namespace chain { + // should be defined for c++17, but clang++16 still has not implemented it +#ifdef __cpp_lib_hardware_interference_size + using std::hardware_constructive_interference_size; + using std::hardware_destructive_interference_size; +#else + // 64 bytes on x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... + [[maybe_unused]] constexpr std::size_t hardware_constructive_interference_size = 64; + [[maybe_unused]] constexpr std::size_t hardware_destructive_interference_size = 64; +#endif + + // Use instead of std::atomic when std::atomic does not support type + template + class large_atomic { + alignas(hardware_destructive_interference_size) + mutable std::mutex mtx; + T value{}; + public: + T load() const { + std::lock_guard g(mtx); + return value; + } + void store(const T& v) { + std::lock_guard g(mtx); + value = v; + } + + class accessor { + std::lock_guard g; + T& v; + public: + accessor(std::mutex& m, T& v) + : g(m), v(v) {} + T& value() { return v; } + }; + + auto make_accessor() { return accessor{mtx, value}; } + }; + /** * Wrapper class for thread pool of boost asio io_context run. * Also names threads so that tools like htop can see thread name. diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 8da4b26a70..a3ab7201fe 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -33,16 +33,6 @@ #include #include -// should be defined for c++17, but clang++16 still has not implemented it -#ifdef __cpp_lib_hardware_interference_size - using std::hardware_constructive_interference_size; - using std::hardware_destructive_interference_size; -#else - // 64 bytes on x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... - [[maybe_unused]] constexpr std::size_t hardware_constructive_interference_size = 64; - [[maybe_unused]] constexpr std::size_t hardware_destructive_interference_size = 64; -#endif - using namespace eosio::chain::plugin_interface; using namespace std::chrono_literals; From 302b957395491a1a548c72f916233f63e14126ea Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 08:59:48 -0500 Subject: [PATCH 22/31] GH-2102 Use large_atomic for if_irreversible_block_id --- libraries/chain/controller.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index ebc13b51ba..dd15448965 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -900,7 +900,7 @@ struct controller_impl { std::optional pending; block_handle chain_head; fork_database fork_db; - block_id_type if_irreversible_block_id; + large_atomic if_irreversible_block_id; resource_limits_manager resource_limits; subjective_billing subjective_bill; authorization_manager authorization; @@ -1389,7 +1389,8 @@ struct controller_impl { ("lib_num", lib_num)("bn", fork_db_root_block_num()) ); } - uint32_t if_lib_num = block_header::num_from_id(if_irreversible_block_id); + block_id_type irreversible_block_id = if_irreversible_block_id.load(); + uint32_t if_lib_num = block_header::num_from_id(irreversible_block_id); const uint32_t new_lib_num = if_lib_num > 0 ? if_lib_num : fork_db_head_irreversible_blocknum(); if( new_lib_num <= lib_num ) @@ -1397,7 +1398,7 @@ struct controller_impl { bool savanna_transistion_required = false; auto mark_branch_irreversible = [&, this](auto& forkdb) { - auto branch = (if_lib_num > 0) ? forkdb.fetch_branch( if_irreversible_block_id, new_lib_num) + auto branch = (if_lib_num > 0) ? forkdb.fetch_branch( irreversible_block_id, new_lib_num) : forkdb.fetch_branch( fork_db_head_or_pending(forkdb)->id(), new_lib_num ); try { auto should_process = [&](auto& bsp) { @@ -3754,7 +3755,7 @@ struct controller_impl { return fork_db.apply>(unlinkable, f); } - // expected to be called from application thread as it modifies bsp->valid_qc and if_irreversible_block_id + // expected to be called from application thread as it modifies bsp->valid_qc void integrate_received_qc_to_block(const block_state_ptr& bsp_in) { // extract QC from block extension const auto& block_exts = bsp_in->block->validate_and_extract_extensions(); @@ -3805,8 +3806,7 @@ struct controller_impl { if (bsp->core.final_on_strong_qc_block_num > 0) { const auto& final_on_strong_qc_block_ref = bsp->core.get_block_reference(bsp->core.final_on_strong_qc_block_num); - auto final = fetch_bsp(final_on_strong_qc_block_ref.block_id); - if (final && final->is_valid()) { + if (fork_db_validated_block_exists(final_on_strong_qc_block_ref.block_id)) { create_and_send_vote_msg(bsp); } } @@ -4354,10 +4354,11 @@ struct controller_impl { void set_if_irreversible_block_id(const block_id_type& id) { const block_num_type id_num = block_header::num_from_id(id); - const block_num_type current_num = block_header::num_from_id(if_irreversible_block_id); + auto accessor = if_irreversible_block_id.make_accessor(); + const block_num_type current_num = block_header::num_from_id(accessor.value()); if( id_num > current_num ) { - dlog("set irreversible block ${bn}: ${id}, old ${obn}: ${oid}", ("bn", id_num)("id", id)("obn", current_num)("oid", if_irreversible_block_id)); - if_irreversible_block_id = id; + dlog("set irreversible block ${bn}: ${id}, old ${obn}: ${oid}", ("bn", id_num)("id", id)("obn", current_num)("oid", accessor.value())); + accessor.value() = id; } } @@ -4932,7 +4933,7 @@ void controller::set_if_irreversible_block_id(const block_id_type& id) { } uint32_t controller::if_irreversible_block_num() const { - return block_header::num_from_id(my->if_irreversible_block_id); + return block_header::num_from_id(my->if_irreversible_block_id.load()); } uint32_t controller::last_irreversible_block_num() const { From be77ca67f5096a98731b27b8e2b4539824cee39f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 09:04:06 -0500 Subject: [PATCH 23/31] GH-2102 No need to recalculate finality digest --- libraries/chain/controller.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index dd15448965..dbfb6a8003 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3530,8 +3530,6 @@ struct controller_impl { if (!bsp->block->is_proper_svnn_block()) return; - auto finalizer_digest = bsp->compute_finality_digest(); - // Each finalizer configured on the node which is present in the active finalizer policy // may create and sign a vote // TODO: as a future optimization, we could run maybe_vote on a thread (it would need a @@ -3539,7 +3537,7 @@ struct controller_impl { // off the main thread. net_plugin is fine for this to be emitted from any thread. // Just need to update the comment in net_plugin my_finalizers.maybe_vote( - *bsp->active_finalizer_policy, bsp, finalizer_digest, [&](const vote_message& vote) { + *bsp->active_finalizer_policy, bsp, bsp->strong_digest, [&](const vote_message& vote) { // net plugin subscribed to this signal. it will broadcast the vote message // on receiving the signal emit(voted_block, vote); From 23945fffaff2def839cf34c24306e546c0224dd4 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 10:27:51 -0500 Subject: [PATCH 24/31] GH-2102 Move valid_qc into pending_qc and make thread safe --- libraries/chain/block_state.cpp | 28 ------------- libraries/chain/controller.cpp | 10 ++--- libraries/chain/hotstuff/finalizer.cpp | 2 +- libraries/chain/hotstuff/hotstuff.cpp | 41 +++++++++++++++++-- .../chain/include/eosio/chain/block_state.hpp | 7 ++-- .../include/eosio/chain/hotstuff/hotstuff.hpp | 8 +++- 6 files changed, 54 insertions(+), 42 deletions(-) diff --git a/libraries/chain/block_state.cpp b/libraries/chain/block_state.cpp index bae2ea5d43..997e3a6fe1 100644 --- a/libraries/chain/block_state.cpp +++ b/libraries/chain/block_state.cpp @@ -85,7 +85,6 @@ block_state_ptr block_state::create_if_genesis_block(const block_state_legacy& b // TODO: https://github.com/AntelopeIO/leap/issues/2057 // TODO: Do not aggregate votes on blocks created from block_state_legacy. This can be removed when #2057 complete. result.pending_qc = pending_quorum_certificate{result.active_finalizer_policy->finalizers.size(), result.active_finalizer_policy->threshold, result.active_finalizer_policy->max_weak_sum_before_weak_final()}; - result.valid_qc = {}; // best qc received from the network inside block extension, empty until first savanna proper IF block // Calculate Merkle tree root in Savanna way so that it is stored in Leaf Node when building block_state. const auto& digests = *bsp.action_receipt_digests_savanna; @@ -252,33 +251,6 @@ void block_state::verify_qc(const valid_quorum_certificate& qc) const { invalid_qc_claim, "signature validation failed" ); } -std::optional block_state::get_best_qc() const { - // if pending_qc does not have a valid QC, consider valid_qc only - if( !pending_qc.is_quorum_met() ) { - if( valid_qc ) { - return quorum_certificate{ block_num(), *valid_qc }; - } else { - return std::nullopt; - } - } - - // extract valid QC from pending_qc - valid_quorum_certificate valid_qc_from_pending = pending_qc.to_valid_quorum_certificate(); - - // if valid_qc does not have value, consider valid_qc_from_pending only - if( !valid_qc ) { - return quorum_certificate{ block_num(), valid_qc_from_pending }; - } - - // Both valid_qc and valid_qc_from_pending have value. Compare them and select a better one. - // Strong beats weak. Tie break by valid_qc. - const auto& best_qc = - valid_qc->is_strong() == valid_qc_from_pending.is_strong() ? - *valid_qc : // tie broke by valid_qc - valid_qc->is_strong() ? *valid_qc : valid_qc_from_pending; // strong beats weak - return quorum_certificate{ block_num(), best_qc }; -} - valid_t block_state::new_valid(const block_header_state& next_bhs, const digest_type& action_mroot, const digest_type& strong_digest) const { assert(valid); assert(next_bhs.core.last_final_block_num() >= core.last_final_block_num()); diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index dbfb6a8003..fa51f169b4 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3753,7 +3753,7 @@ struct controller_impl { return fork_db.apply>(unlinkable, f); } - // expected to be called from application thread as it modifies bsp->valid_qc + // thread safe void integrate_received_qc_to_block(const block_state_ptr& bsp_in) { // extract QC from block extension const auto& block_exts = bsp_in->block->validate_and_extract_extensions(); @@ -3772,17 +3772,17 @@ struct controller_impl { return; } - // Don't save the QC from block extension if the claimed block has a better valid_qc. - if (claimed->valid_qc && (claimed->valid_qc->is_strong() || received_qc.is_weak())) { + // Don't save the QC from block extension if the claimed block has a better or same valid_qc. + if (received_qc.is_weak() || claimed->valid_qc_is_strong()) { dlog("qc not better, claimed->valid: ${qbn} ${qid}, strong=${s}, received: ${rqc}, for block ${bn} ${id}", - ("qbn", claimed->block_num())("qid", claimed->id())("s", claimed->valid_qc->is_strong()) + ("qbn", claimed->block_num())("qid", claimed->id())("s", !received_qc.is_weak()) // use is_weak() to avoid mutex on valid_qc_is_strong() ("rqc", qc_ext.qc.to_qc_claim())("bn", bsp_in->block_num())("id", bsp_in->id())); return; } // Save the QC. This is safe as the function is called by push_block & accept_block from application thread. dlog("setting valid qc: ${rqc} into claimed block ${bn} ${id}", ("rqc", qc_ext.qc.to_qc_claim())("bn", claimed->block_num())("id", claimed->id())); - claimed->valid_qc = received_qc; + claimed->set_valid_qc(received_qc); // advance LIB if QC is strong if( received_qc.is_strong() ) { diff --git a/libraries/chain/hotstuff/finalizer.cpp b/libraries/chain/hotstuff/finalizer.cpp index 8fd52936ff..af0d3028c2 100644 --- a/libraries/chain/hotstuff/finalizer.cpp +++ b/libraries/chain/hotstuff/finalizer.cpp @@ -99,7 +99,7 @@ std::optional finalizer::maybe_vote(const bls_public_key& pub_key, } else { sig = priv_key.sign({(uint8_t*)digest.data(), (uint8_t*)digest.data() + digest.data_size()}); } - return vote_message{ bsp->id(), decision == vote_decision::strong_vote, pub_key, sig }; + return std::optional{vote_message{ bsp->id(), decision == vote_decision::strong_vote, pub_key, sig }}; } return {}; } diff --git a/libraries/chain/hotstuff/hotstuff.cpp b/libraries/chain/hotstuff/hotstuff.cpp index 262efde066..9cd69d2e00 100644 --- a/libraries/chain/hotstuff/hotstuff.cpp +++ b/libraries/chain/hotstuff/hotstuff.cpp @@ -163,10 +163,7 @@ vote_status pending_quorum_certificate::add_vote(block_num_type block_num, bool return s; } -// thread safe valid_quorum_certificate pending_quorum_certificate::to_valid_quorum_certificate() const { - std::lock_guard g(*_mtx); - valid_quorum_certificate valid_qc; if( _state == state_t::strong ) { @@ -183,6 +180,44 @@ valid_quorum_certificate pending_quorum_certificate::to_valid_quorum_certificate return valid_qc; } +std::optional pending_quorum_certificate::get_best_qc(block_num_type block_num) const { + std::lock_guard g(*_mtx); + // if pending_qc does not have a valid QC, consider valid_qc only + if( !is_quorum_met_no_lock() ) { + if( _valid_qc ) { + return std::optional{quorum_certificate{ block_num, *_valid_qc }}; + } else { + return std::nullopt; + } + } + + // extract valid QC from pending_qc + valid_quorum_certificate valid_qc_from_pending = to_valid_quorum_certificate(); + + // if valid_qc does not have value, consider valid_qc_from_pending only + if( !_valid_qc ) { + return std::optional{quorum_certificate{ block_num, valid_qc_from_pending }}; + } + + // Both valid_qc and valid_qc_from_pending have value. Compare them and select a better one. + // Strong beats weak. Tie break by valid_qc. + const auto& best_qc = + _valid_qc->is_strong() == valid_qc_from_pending.is_strong() ? + *_valid_qc : // tie broke by valid_qc + _valid_qc->is_strong() ? *_valid_qc : valid_qc_from_pending; // strong beats weak + return std::optional{quorum_certificate{ block_num, best_qc }}; +} + +void pending_quorum_certificate::set_valid_qc(const valid_quorum_certificate& qc) { + std::lock_guard g(*_mtx); + _valid_qc = qc; +} + +bool pending_quorum_certificate::valid_qc_is_strong() const { + std::lock_guard g(*_mtx); + return _valid_qc && _valid_qc->is_strong(); +} + bool pending_quorum_certificate::is_quorum_met_no_lock() const { return is_quorum_met(_state); } diff --git a/libraries/chain/include/eosio/chain/block_state.hpp b/libraries/chain/include/eosio/chain/block_state.hpp index 9c1fc5a7d9..9f26b8b73a 100644 --- a/libraries/chain/include/eosio/chain/block_state.hpp +++ b/libraries/chain/include/eosio/chain/block_state.hpp @@ -69,7 +69,6 @@ struct block_state : public block_header_state { // block_header_state provi digest_type strong_digest; // finalizer_digest (strong, cached so we can quickly validate votes) weak_digest_t weak_digest; // finalizer_digest (weak, cached so we can quickly validate votes) pending_quorum_certificate pending_qc; // where we accumulate votes we receive - std::optional valid_qc; // best qc received from the network inside block extension std::optional valid; // ------ updated for votes, used for fork_db ordering ------------------------------ @@ -103,7 +102,9 @@ struct block_state : public block_header_state { // block_header_state provi const extensions_type& header_extensions() const { return block_header_state::header.header_extensions; } uint32_t irreversible_blocknum() const { return core.last_final_block_num(); } // backwards compatibility uint32_t last_final_block_num() const { return core.last_final_block_num(); } - std::optional get_best_qc() const; + std::optional get_best_qc() const { return pending_qc.get_best_qc(block_num()); } // thread safe + bool valid_qc_is_strong() const { return pending_qc.valid_qc_is_strong(); } // thread safe + void set_valid_qc(const valid_quorum_certificate& qc) { pending_qc.set_valid_qc(qc); } protocol_feature_activation_set_ptr get_activated_protocol_features() const { return block_header_state::activated_protocol_features; } uint32_t last_qc_block_num() const { return core.latest_qc_claim().block_num; } @@ -164,4 +165,4 @@ using block_state_pair = std::pair, blo FC_REFLECT( eosio::chain::valid_t::finality_leaf_node_t, (major_version)(minor_version)(block_num)(finality_digest)(action_mroot) ) FC_REFLECT( eosio::chain::valid_t, (validation_tree)(validation_mroots)) FC_REFLECT( eosio::chain::finality_data_t, (major_version)(minor_version)(active_finalizer_policy_generation)(action_mroot)(base_digest)) -FC_REFLECT_DERIVED( eosio::chain::block_state, (eosio::chain::block_header_state), (block)(strong_digest)(weak_digest)(pending_qc)(valid_qc)(valid)(validated) ) +FC_REFLECT_DERIVED( eosio::chain::block_state, (eosio::chain::block_header_state), (block)(strong_digest)(weak_digest)(pending_qc)(valid)(validated) ) diff --git a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp index 7fae2ceb78..b54f8d7416 100644 --- a/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp +++ b/libraries/chain/include/eosio/chain/hotstuff/hotstuff.hpp @@ -124,12 +124,15 @@ namespace eosio::chain { bool has_voted(size_t index) const; state_t state() const { std::lock_guard g(*_mtx); return _state; }; - valid_quorum_certificate to_valid_quorum_certificate() const; + std::optional get_best_qc(block_num_type block_num) const; + void set_valid_qc(const valid_quorum_certificate& qc); + bool valid_qc_is_strong() const; private: friend struct fc::reflector; friend class qc_chain; std::unique_ptr _mtx; + std::optional _valid_qc; // best qc received from the network inside block extension uint64_t _quorum {0}; uint64_t _max_weak_sum_before_weak_final {0}; // max weak sum before becoming weak_final state_t _state { state_t::unrestricted }; @@ -150,6 +153,7 @@ namespace eosio::chain { bool is_quorum_met_no_lock() const; bool has_voted_no_lock(bool strong, size_t index) const; + valid_quorum_certificate to_valid_quorum_certificate() const; }; } //eosio::chain @@ -157,7 +161,7 @@ namespace eosio::chain { FC_REFLECT(eosio::chain::vote_message, (block_id)(strong)(finalizer_key)(sig)); FC_REFLECT_ENUM(eosio::chain::vote_status, (success)(duplicate)(unknown_public_key)(invalid_signature)(unknown_block)) FC_REFLECT(eosio::chain::valid_quorum_certificate, (_strong_votes)(_weak_votes)(_sig)); -FC_REFLECT(eosio::chain::pending_quorum_certificate, (_quorum)(_max_weak_sum_before_weak_final)(_state)(_strong_sum)(_weak_sum)(_weak_votes)(_strong_votes)); +FC_REFLECT(eosio::chain::pending_quorum_certificate, (_valid_qc)(_quorum)(_max_weak_sum_before_weak_final)(_state)(_strong_sum)(_weak_sum)(_weak_votes)(_strong_votes)); FC_REFLECT_ENUM(eosio::chain::pending_quorum_certificate::state_t, (unrestricted)(restricted)(weak_achieved)(weak_final)(strong)); FC_REFLECT(eosio::chain::pending_quorum_certificate::votes_t, (_bitset)(_sig)); FC_REFLECT(eosio::chain::quorum_certificate, (block_num)(qc)); From ba5006eec71b1fa356d0cb126104fcc798f8fb5f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 5 Apr 2024 16:15:34 -0500 Subject: [PATCH 25/31] GH-2102 Improve test conditions --- tests/lib_advance_test.py | 2 +- tests/nodeos_snapshot_diff_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lib_advance_test.py b/tests/lib_advance_test.py index cb2ddd104c..4d7d5fd1dd 100755 --- a/tests/lib_advance_test.py +++ b/tests/lib_advance_test.py @@ -140,7 +140,7 @@ assert prodD.getIrreversibleBlockNum() > max(libProdABeforeKill, libProdDBeforeKill) # instant finality does not drop late blocks, but can still get unlinkable when syncing and getting a produced block - allowedUnlinkableBlocks = afterBlockNum-beforeBlockNum if not activateIF else 5 + allowedUnlinkableBlocks = afterBlockNum-beforeBlockNum logFile = Utils.getNodeDataDir(prodNode3.nodeId) + "/stderr.txt" f = open(logFile) contents = f.read() diff --git a/tests/nodeos_snapshot_diff_test.py b/tests/nodeos_snapshot_diff_test.py index c7d37c79d6..2185df1509 100755 --- a/tests/nodeos_snapshot_diff_test.py +++ b/tests/nodeos_snapshot_diff_test.py @@ -182,7 +182,7 @@ def waitForBlock(node, blockNum, blockType=BlockType.head, timeout=None, reportI assert ret is not None, "Snapshot scheduling failed" Print("Wait for programmable node lib to advance") - waitForBlock(nodeProg, ret_head_block_num+1, blockType=BlockType.lib) + waitForBlock(nodeProg, ret_head_block_num, blockType=BlockType.lib) Print("Kill programmable node") nodeProg.kill(signal.SIGTERM) From 9de11ffb6d0b85764ca5a03ceaa81bd7b958ee86 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Sat, 6 Apr 2024 17:21:24 -0500 Subject: [PATCH 26/31] GH-2102 Move produced/received block logging into controller so it logs all processed blocks --- libraries/chain/controller.cpp | 66 ++++++++++++----- .../chain/include/eosio/chain/controller.hpp | 2 +- libraries/testing/tester.cpp | 2 +- plugins/producer_plugin/producer_plugin.cpp | 74 +++++++------------ 4 files changed, 78 insertions(+), 66 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index fa51f169b4..554c9d813c 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3339,16 +3339,45 @@ struct controller_impl { return {}; } + + template + void log_applied(controller::block_report& br, const BSP& bsp) const { + fc::time_point now = fc::time_point::now(); + if (now - bsp->timestamp() < fc::minutes(5) || (bsp->block_num() % 1000 == 0)) { + ilog("Received block ${id}... #${n} @ ${t} signed by ${p} " // "Received" instead of "Applied" so it matches existing log output + "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", + ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) + ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) + ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) + ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); + const auto& hb_id = chain_head.id(); + const auto& hb = chain_head.block(); + if (read_mode != db_read_mode::IRREVERSIBLE && hb && hb_id != bsp->id() && hb != nullptr) { // not applied to head + ilog("Block not applied to head ${id}... #${n} @ ${t} signed by ${p} " + "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", + ("p", hb->producer)("id", hb_id.str().substr(8, 16))("n", hb->block_num())("t", hb->timestamp) + ("count", hb->transactions.size())("lib", fork_db_root_block_num()) + ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us)("elapsed", br.total_elapsed_time)("time", br.total_time) + ("latency", (now - hb->timestamp).count() / 1000)); + } + } + } + template void apply_block( controller::block_report& br, const BSP& bsp, controller::block_status s, const trx_meta_cache_lookup& trx_lookup ) { try { try { auto start = fc::time_point::now(); + + const bool already_valid = bsp->is_valid(); + if (!already_valid) // has not been validated (applied) before, only in forkdb, integrate and possibly vote now + integrate_qc(bsp); + const signed_block_ptr& b = bsp->block; const auto& new_protocol_feature_activations = bsp->get_new_protocol_feature_activations(); + const auto& producer_block_id = bsp->id(); - auto producer_block_id = bsp->id(); start_block( b->timestamp, b->confirmed, new_protocol_feature_activations, s, producer_block_id, fc::time_point::maximum() ); // validated in create_block_handle() @@ -3476,6 +3505,9 @@ struct controller_impl { commit_block(s); br.total_time = fc::time_point::now() - start; + if (!already_valid) + log_applied(br, bsp); + } catch ( const std::bad_alloc& ) { throw; } catch ( const boost::interprocess::bad_alloc& ) { @@ -4012,23 +4044,11 @@ struct controller_impl { auto except = std::exception_ptr{}; try { const auto& bsp = *ritr; - bool valid = bsp->is_valid(); - if (!valid) // has not been validated (applied) before, only in forkdb, integrate and possibly vote now - integrate_qc(bsp); br = controller::block_report{}; - apply_block( br, bsp, valid ? controller::block_status::validated - : controller::block_status::complete, trx_lookup ); - if (!valid) { // was just applied for first time so log it - if (fc::time_point::now() - bsp->timestamp() < fc::minutes(5) || (bsp->block_num() % 1000 == 0)) { - ilog("Applied block ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", - ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) - ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) - ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) - ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); - } - } + apply_block( br, bsp, bsp->is_valid() ? controller::block_status::validated + : controller::block_status::complete, trx_lookup ); + if( conf.terminate_at_block > 0 && conf.terminate_at_block <= chain_head.block_num()) { ilog("Reached configured maximum block ${num}; terminating", ("num", conf.terminate_at_block) ); shutdown(); @@ -4750,9 +4770,21 @@ void controller::assemble_and_complete_block( block_report& br, const signer_cal br = my->pending->_block_report; } -void controller::commit_block() { +void controller::commit_block(block_report& br) { + fc::time_point start = fc::time_point::now(); + validate_db_available_size(); my->commit_block(block_status::incomplete); + + const auto& id = head_block_id(); + const auto& new_b = head_block(); + br.total_time += fc::time_point::now() - start; + + ilog("Produced block ${id}... #${n} @ ${t} signed by ${p} " + "[trxs: ${count}, lib: ${lib}, confirmed: ${confs}, net: ${net}, cpu: ${cpu}, elapsed: ${et}, time: ${tt}]", + ("p", new_b->producer)("id", id.str().substr(8, 16))("n", new_b->block_num())("t", new_b->timestamp) + ("count", new_b->transactions.size())("lib", last_irreversible_block_num())("net", br.total_net_usage) + ("cpu", br.total_cpu_usage_us)("et", br.total_elapsed_time)("tt", br.total_time)("confs", new_b->confirmed)); } void controller::maybe_switch_forks(const forked_callback_t& cb, const trx_meta_cache_lookup& trx_lookup) { diff --git a/libraries/chain/include/eosio/chain/controller.hpp b/libraries/chain/include/eosio/chain/controller.hpp index fc9044aece..08fb296026 100644 --- a/libraries/chain/include/eosio/chain/controller.hpp +++ b/libraries/chain/include/eosio/chain/controller.hpp @@ -177,7 +177,7 @@ namespace eosio::chain { void assemble_and_complete_block( block_report& br, const signer_callback_type& signer_callback ); void sign_block( const signer_callback_type& signer_callback ); - void commit_block(); + void commit_block(block_report& br); void maybe_switch_forks(const forked_callback_t& cb, const trx_meta_cache_lookup& trx_lookup); // thread-safe diff --git a/libraries/testing/tester.cpp b/libraries/testing/tester.cpp index 7e1efb0177..6bcbde9ae2 100644 --- a/libraries/testing/tester.cpp +++ b/libraries/testing/tester.cpp @@ -482,7 +482,7 @@ namespace eosio { namespace testing { return result; } ); - control->commit_block(); + control->commit_block(br); last_produced_block[producer_name] = control->head_block_id(); _wait_for_vote_if_needed(*control); diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 976a08b77a..91f23c1798 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -283,19 +283,8 @@ struct block_time_tracker { } void report(uint32_t block_num, account_name producer, producer_plugin::speculative_block_metrics& metrics) { - using namespace std::string_literals; - assert(!paused); auto now = fc::time_point::now(); - if( _log.is_enabled( fc::log_level::debug ) ) { - auto diff = now - clear_time_point - block_idle_time - trx_success_time - trx_fail_time - transient_trx_time - other_time; - fc_dlog( _log, "Block #${n} ${p} trx idle: ${i}us out of ${t}us, success: ${sn}, ${s}us, fail: ${fn}, ${f}us, " - "transient: ${ttn}, ${tt}us, other: ${o}us${rest}", - ("n", block_num)("p", producer) - ("i", block_idle_time)("t", now - clear_time_point)("sn", trx_success_num)("s", trx_success_time) - ("fn", trx_fail_num)("f", trx_fail_time) - ("ttn", transient_trx_num)("tt", transient_trx_time) - ("o", other_time)("rest", diff.count() > 5 ? ", diff: "s + std::to_string(diff.count()) + "us"s : ""s ) ); - } + report(block_num, producer, now); metrics.block_producer = producer; metrics.block_num = block_num; metrics.block_total_time_us = (now - clear_time_point).count(); @@ -309,6 +298,21 @@ struct block_time_tracker { metrics.block_other_time_us = other_time.count(); } + void report(uint32_t block_num, account_name producer, const fc::time_point& now = fc::time_point::now()) { + using namespace std::string_literals; + assert(!paused); + if( _log.is_enabled( fc::log_level::debug ) ) { + auto diff = now - clear_time_point - block_idle_time - trx_success_time - trx_fail_time - transient_trx_time - other_time; + fc_dlog( _log, "Block #${n} ${p} trx idle: ${i}us out of ${t}us, success: ${sn}, ${s}us, fail: ${fn}, ${f}us, " + "transient: ${ttn}, ${tt}us, other: ${o}us${rest}", + ("n", block_num)("p", producer) + ("i", block_idle_time)("t", now - clear_time_point)("sn", trx_success_num)("s", trx_success_time) + ("fn", trx_fail_num)("f", trx_fail_time) + ("ttn", transient_trx_num)("tt", transient_trx_time) + ("o", other_time)("rest", diff.count() > 5 ? ", diff: "s + std::to_string(diff.count()) + "us"s : ""s ) ); + } + } + void clear() { assert(!paused); block_idle_time = trx_fail_time = trx_success_time = transient_trx_time = other_time = fc::microseconds{}; @@ -744,25 +748,7 @@ class producer_plugin_impl : public std::enable_shared_from_thistimestamp < fc::minutes(5) || (blk_num % 1000 == 0)) { - ilog("Received block ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", - ("p", block->producer)("id", id.str().substr(8, 16))("n", blk_num)("t", block->timestamp) - ("count", block->transactions.size())("lib", chain.last_irreversible_block_num()) - ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) - ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (now - block->timestamp).count() / 1000)); - const auto& hb_id = chain.head_block_id(); - const auto& hb = chain.head_block(); - if (chain.get_read_mode() != db_read_mode::IRREVERSIBLE && hb && hb_id != id && hb != nullptr) { // not applied to head - ilog("Block not applied to head ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, net: ${net}, cpu: ${cpu}, elapsed: ${elapsed}, time: ${time}, latency: ${latency} ms]", - ("p", hb->producer)("id", hb_id.str().substr(8, 16))("n", hb->block_num())("t", hb->timestamp) - ("count", hb->transactions.size())("lib", chain.last_irreversible_block_num()) - ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us)("elapsed", br.total_elapsed_time)("time", br.total_time) - ("latency", (now - hb->timestamp).count() / 1000)); - } - } - if (_update_incoming_block_metrics) { + if (_update_incoming_block_metrics) { // only includes those blocks pushed, not those that are accepted and processed internally _update_incoming_block_metrics({.trxs_incoming_total = block->transactions.size(), .cpu_usage_us = br.total_cpu_usage_us, .total_elapsed_time_us = br.total_elapsed_time.count(), @@ -2672,25 +2658,12 @@ void producer_plugin_impl::produce_block() { return sigs; }); - chain.commit_block(); - - const auto& id = chain.head_block_id(); - const auto& new_b = chain.head_block(); - producer_plugin::produced_block_metrics metrics; - br.total_time += fc::time_point::now() - start; + chain.commit_block(br); - ilog("Produced block ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, confirmed: ${confs}, net: ${net}, cpu: ${cpu}, elapsed: ${et}, time: ${tt}]", - ("p", new_b->producer)("id", id.str().substr(8, 16))("n", new_b->block_num())("t", new_b->timestamp) - ("count", new_b->transactions.size())("lib", chain.last_irreversible_block_num())("net", br.total_net_usage) - ("cpu", br.total_cpu_usage_us)("et", br.total_elapsed_time)("tt", br.total_time)("confs", new_b->confirmed)); - - _time_tracker.add_other_time(); - _time_tracker.report(new_b->block_num(), new_b->producer, metrics); - _time_tracker.clear(); - + const auto& new_b = chain.head_block(); if (_update_produced_block_metrics) { + producer_plugin::produced_block_metrics metrics; metrics.unapplied_transactions_total = _unapplied_transactions.size(); metrics.subjective_bill_account_size_total = chain.get_subjective_billing().get_account_cache_size(); metrics.scheduled_trxs_total = chain.db().get_index().size(); @@ -2702,7 +2675,14 @@ void producer_plugin_impl::produce_block() { metrics.last_irreversible = chain.last_irreversible_block_num(); metrics.head_block_num = chain.head_block_num(); _update_produced_block_metrics(metrics); + + _time_tracker.add_other_time(); + _time_tracker.report(new_b->block_num(), new_b->producer, metrics); + } else { + _time_tracker.add_other_time(); + _time_tracker.report(new_b->block_num(), new_b->producer); } + _time_tracker.clear(); } void producer_plugin::received_block(uint32_t block_num) { From 8e98c186087a6cab10c86fe970ab9c0cfaf43704 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Sat, 6 Apr 2024 18:57:32 -0500 Subject: [PATCH 27/31] GH-2102 Fix log_applied to not be called during replay --- libraries/chain/controller.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 554c9d813c..02764ae16f 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3342,6 +3342,8 @@ struct controller_impl { template void log_applied(controller::block_report& br, const BSP& bsp) const { + if (replaying) // fork_db_root_block_num not available during replay + return; fc::time_point now = fc::time_point::now(); if (now - bsp->timestamp() < fc::minutes(5) || (bsp->block_num() % 1000 == 0)) { ilog("Received block ${id}... #${n} @ ${t} signed by ${p} " // "Received" instead of "Applied" so it matches existing log output From a9058a1c34e35adf217f881e3135c05913e13b3b Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 8 Apr 2024 09:04:17 -0500 Subject: [PATCH 28/31] GH-2102 Add a copyable atomic type --- .../include/eosio/chain/thread_utils.hpp | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/libraries/chain/include/eosio/chain/thread_utils.hpp b/libraries/chain/include/eosio/chain/thread_utils.hpp index f687096aaa..b4e4d5a673 100644 --- a/libraries/chain/include/eosio/chain/thread_utils.hpp +++ b/libraries/chain/include/eosio/chain/thread_utils.hpp @@ -50,6 +50,36 @@ namespace eosio { namespace chain { auto make_accessor() { return accessor{mtx, value}; } }; + template + class copyable_atomic { + std::atomic value; + public: + copyable_atomic() = default; + copyable_atomic(T v) noexcept + : value(v) {} + copyable_atomic(const copyable_atomic& rhs) + : value(rhs.value.load(std::memory_order_relaxed)) {} + copyable_atomic(copyable_atomic&& rhs) noexcept + : value(rhs.value.load(std::memory_order_relaxed)) {} + + T load(std::memory_order mo = std::memory_order_seq_cst) const noexcept { return value.load(mo); } + void store(T v, std::memory_order mo = std::memory_order_seq_cst) noexcept { value.store(v, mo); } + + template + friend DS& operator<<(DS& ds, const copyable_atomic& ca) { + fc::raw::pack(ds, ca.load(std::memory_order_relaxed)); + return ds; + } + + template + friend DS& operator>>(DS& ds, copyable_atomic& ca) { + T v; + fc::raw::unpack(ds, v); + ca.store(v, std::memory_order_relaxed); + return ds; + } + }; + /** * Wrapper class for thread pool of boost asio io_context run. * Also names threads so that tools like htop can see thread name. From 6d64c27317ecb30284271be84c7d7daf90dbe1f0 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 8 Apr 2024 09:09:32 -0500 Subject: [PATCH 29/31] GH-2102 Integrate qc and vote if possible off the main thread. --- libraries/chain/block_state.cpp | 2 +- libraries/chain/controller.cpp | 48 ++++++++----------- libraries/chain/fork_database.cpp | 2 +- .../chain/include/eosio/chain/block_state.hpp | 5 +- .../chain/include/eosio/chain/controller.hpp | 2 + .../eosio/chain/hotstuff/finalizer.hpp | 42 ++++++++++++---- plugins/net_plugin/net_plugin.cpp | 2 +- unittests/finalizer_tests.cpp | 16 +++---- 8 files changed, 70 insertions(+), 49 deletions(-) diff --git a/libraries/chain/block_state.cpp b/libraries/chain/block_state.cpp index 997e3a6fe1..931c81ff14 100644 --- a/libraries/chain/block_state.cpp +++ b/libraries/chain/block_state.cpp @@ -104,7 +104,7 @@ block_state_ptr block_state::create_if_genesis_block(const block_state_legacy& b .validation_mroots = { validation_tree.get_root() } }; - result.validated = bsp.is_valid(); + result.validated.store(bsp.is_valid()); result.pub_keys_recovered = bsp._pub_keys_recovered; result.cached_trxs = bsp._cached_trxs; result.action_mroot = action_mroot_svnn; diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 02764ae16f..9c88880dae 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -1179,7 +1179,7 @@ struct controller_impl { chain_id( chain_id ), read_mode( cfg.read_mode ), thread_pool(), - my_finalizers{ .t_startup = fc::time_point::now(), .persist_file_path = cfg.finalizers_dir / "safety.dat" }, + my_finalizers(fc::time_point::now(), cfg.finalizers_dir / "safety.dat"), wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() ) { thread_pool.start( cfg.thread_pool_size, [this]( const fc::exception& e ) { @@ -3373,8 +3373,8 @@ struct controller_impl { auto start = fc::time_point::now(); const bool already_valid = bsp->is_valid(); - if (!already_valid) // has not been validated (applied) before, only in forkdb, integrate and possibly vote now - integrate_qc(bsp); + if (!already_valid) // has not been validated (applied) before, only in forkdb, see if we can vote now + consider_voting(bsp); const signed_block_ptr& b = bsp->block; const auto& new_protocol_feature_activations = bsp->get_new_protocol_feature_activations(); @@ -3544,14 +3544,14 @@ struct controller_impl { } bool node_has_voted_if_finalizer(const block_id_type& id) const { - if (my_finalizers.finalizers.empty()) + if (my_finalizers.empty()) return true; std::optional voted = fork_db.apply_s>([&](auto& forkdb) -> std::optional { auto bsp = forkdb.get_block(id); if (bsp) { - return std::ranges::all_of(my_finalizers.finalizers, [&bsp](auto& f) { - return bsp->has_voted(f.first); + return my_finalizers.all_of_public_keys([&bsp](const auto& k) { + return bsp->has_voted(k); }); } return false; @@ -3560,20 +3560,15 @@ struct controller_impl { return !voted || *voted; } + // thread safe void create_and_send_vote_msg(const block_state_ptr& bsp) { if (!bsp->block->is_proper_svnn_block()) return; - // Each finalizer configured on the node which is present in the active finalizer policy - // may create and sign a vote - // TODO: as a future optimization, we could run maybe_vote on a thread (it would need a - // lock around the file access). We should document that the voted_block is emitted - // off the main thread. net_plugin is fine for this to be emitted from any thread. - // Just need to update the comment in net_plugin + // Each finalizer configured on the node which is present in the active finalizer policy may create and sign a vote. my_finalizers.maybe_vote( *bsp->active_finalizer_policy, bsp, bsp->strong_digest, [&](const vote_message& vote) { - // net plugin subscribed to this signal. it will broadcast the vote message - // on receiving the signal + // net plugin subscribed to this signal. it will broadcast the vote message on receiving the signal emit(voted_block, vote); // also aggregate our own vote into the pending_qc for this block. @@ -3729,6 +3724,11 @@ struct controller_impl { EOS_ASSERT( id == bsp->id(), block_validate_exception, "provided id ${id} does not match block id ${bid}", ("id", id)("bid", bsp->id()) ); + if constexpr (savanna_mode) { + integrate_received_qc_to_block(bsp); // Save the received QC as soon as possible, no matter whether the block itself is valid or not + consider_voting(bsp); + } + if (conf.terminate_at_block == 0 || bsp->block_num() <= conf.terminate_at_block) { forkdb.add(bsp, mark_valid_t::no, ignore_duplicate_t::yes); } @@ -3814,7 +3814,7 @@ struct controller_impl { return; } - // Save the QC. This is safe as the function is called by push_block & accept_block from application thread. + // Save the QC. dlog("setting valid qc: ${rqc} into claimed block ${bn} ${id}", ("rqc", qc_ext.qc.to_qc_claim())("bn", claimed->block_num())("id", claimed->id())); claimed->set_valid_qc(received_qc); @@ -3830,6 +3830,9 @@ struct controller_impl { } } + void consider_voting(const block_state_legacy_ptr&) {} + + // thread safe void consider_voting(const block_state_ptr& bsp) { // 1. Get the `core.final_on_strong_qc_block_num` for the block you are considering to vote on and use that to find the actual block ID // of the ancestor block that has that block number. @@ -3844,20 +3847,12 @@ struct controller_impl { } } - template - void integrate_qc(const BSP& bsp) { - if constexpr (std::is_same_v) { - integrate_received_qc_to_block(bsp); - consider_voting(bsp); - } - } - template void accept_block(const BSP& bsp) { assert(bsp && bsp->block); - // Save the received QC as soon as possible, no matter whether the block itself is valid or not - integrate_qc(bsp); + // consider voting again as final_on_strong_qc_block may have been validated since the bsp was created in create_block_state_i + consider_voting(bsp); auto do_accept_block = [&](auto& forkdb) { if constexpr (std::is_same_v>) @@ -3877,9 +3872,6 @@ struct controller_impl { { assert(bsp && bsp->block); - // Save the received QC as soon as possible, no matter whether the block itself is valid or not - integrate_qc(bsp); - controller::block_status s = controller::block_status::complete; EOS_ASSERT(!pending, block_validate_exception, "it is not valid to push a block when there is a pending block"); diff --git a/libraries/chain/fork_database.cpp b/libraries/chain/fork_database.cpp index c054836dc5..6ec9d9034d 100644 --- a/libraries/chain/fork_database.cpp +++ b/libraries/chain/fork_database.cpp @@ -24,7 +24,7 @@ namespace eosio::chain { struct block_state_accessor { static bool is_valid(const block_state& bs) { return bs.is_valid(); } - static void set_valid(block_state& bs, bool v) { bs.validated = v; } + static void set_valid(block_state& bs, bool v) { bs.validated.store(v); } }; struct block_state_legacy_accessor { diff --git a/libraries/chain/include/eosio/chain/block_state.hpp b/libraries/chain/include/eosio/chain/block_state.hpp index 9f26b8b73a..a212f1655f 100644 --- a/libraries/chain/include/eosio/chain/block_state.hpp +++ b/libraries/chain/include/eosio/chain/block_state.hpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace eosio::chain { @@ -73,7 +74,7 @@ struct block_state : public block_header_state { // block_header_state provi // ------ updated for votes, used for fork_db ordering ------------------------------ private: - bool validated = false; // We have executed the block's trxs and verified that action merkle root (block id) matches. + copyable_atomic validated{false}; // We have executed the block's trxs and verified that action merkle root (block id) matches. // ------ data members caching information available elsewhere ---------------------- bool pub_keys_recovered = false; @@ -82,7 +83,7 @@ struct block_state : public block_header_state { // block_header_state provi std::optional base_digest; // For finality_data sent to SHiP, computed on demand in get_finality_data() // ------ private methods ----------------------------------------------------------- - bool is_valid() const { return validated; } + bool is_valid() const { return validated.load(); } bool is_pub_keys_recovered() const { return pub_keys_recovered; } deque extract_trxs_metas(); void set_trxs_metas(deque&& trxs_metas, bool keys_recovered); diff --git a/libraries/chain/include/eosio/chain/controller.hpp b/libraries/chain/include/eosio/chain/controller.hpp index 08fb296026..aaaa1a9af9 100644 --- a/libraries/chain/include/eosio/chain/controller.hpp +++ b/libraries/chain/include/eosio/chain/controller.hpp @@ -373,6 +373,8 @@ namespace eosio::chain { signal& accepted_block(); signal& irreversible_block(); signal)>& applied_transaction(); + + // Unlike other signals, voted_block can be signaled from other threads than the main thread. signal& voted_block(); const apply_handler* find_apply_handler( account_name contract, scope_name scope, action_name act )const; diff --git a/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp b/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp index e44ca30fb2..786afbc53a 100644 --- a/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp +++ b/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp @@ -1,8 +1,10 @@ #pragma once -#include "eosio/chain/block_state.hpp" +#include #include #include #include +#include +#include // ------------------------------------------------------------------------------------------- // this file defines the classes: @@ -45,6 +47,7 @@ namespace eosio::chain { }; // ---------------------------------------------------------------------------------------- + // Access is protected by my_finalizers_t mutex struct finalizer { enum class vote_decision { no_vote, strong_vote, weak_vote }; struct vote_result { @@ -58,7 +61,6 @@ namespace eosio::chain { finalizer_safety_information fsi; vote_result decide_vote(const block_state_ptr& bsp); - std::optional maybe_vote(const bls_public_key& pub_key, const block_state_ptr& bsp, const digest_type& digest); }; @@ -68,22 +70,38 @@ namespace eosio::chain { using fsi_t = finalizer_safety_information; using fsi_map = std::map; + private: const block_timestamp_type t_startup; // nodeos startup time, used for default safety_information const std::filesystem::path persist_file_path; // where we save the safety data + mutable std::mutex mtx; mutable fc::datastream persist_file; // we want to keep the file open for speed - std::map finalizers; // the active finalizers for this node + std::map finalizers; // the active finalizers for this node, loaded at startup, not mutated afterwards fsi_map inactive_safety_info; // loaded at startup, not mutated afterwards fsi_t default_fsi = fsi_t::unset_fsi(); // default provided at leap startup mutable bool inactive_safety_info_written{false}; - template + public: + my_finalizers_t(block_timestamp_type startup_time, const std::filesystem::path& persist_file_path) + : t_startup(startup_time) + , persist_file_path(persist_file_path) + {} + + template // thread safe void maybe_vote(const finalizer_policy& fin_pol, const block_state_ptr& bsp, const digest_type& digest, F&& process_vote) { + + if (finalizers.empty()) + return; + std::vector votes; votes.reserve(finalizers.size()); + // Possible improvement in the future, look at locking only individual finalizers and releasing the lock for writing the file. + // Would require making sure that only the latest is ever written to the file. + std::unique_lock g(mtx); + // first accumulate all the votes for (const auto& f : fin_pol.finalizers) { if (auto it = finalizers.find(f.public_key); it != finalizers.end()) { @@ -95,20 +113,28 @@ namespace eosio::chain { // then save the safety info and, if successful, gossip the votes if (!votes.empty()) { save_finalizer_safety_info(); + g.unlock(); for (const auto& vote : votes) std::forward(process_vote)(vote); } } - size_t size() const { return finalizers.size(); } - void set_keys(const std::map& finalizer_keys); + size_t size() const { return finalizers.size(); } // doesn't change, thread safe + bool empty() const { return finalizers.empty(); } // doesn't change, thread safe + + template + bool all_of_public_keys(F&& f) const { // only access keys which do not change, thread safe + return std::ranges::all_of(std::views::keys(finalizers), std::forward(f)); + } + + void set_keys(const std::map& finalizer_keys); // only call on startup void set_default_safety_information(const fsi_t& fsi); - // following two member functions could be private, but are used in testing + // following two member functions could be private, but are used in testing, not thread safe void save_finalizer_safety_info() const; fsi_map load_finalizer_safety_info(); - // for testing purposes only + // for testing purposes only, not thread safe const fsi_t& get_fsi(const bls_public_key& k) { return finalizers[k].fsi; } void set_fsi(const bls_public_key& k, const fsi_t& fsi) { finalizers[k].fsi = fsi; } }; diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index a3ab7201fe..9d4c0f5b16 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -3995,7 +3995,7 @@ namespace eosio { on_active_schedule(chain_plug->chain().active_producers()); } - // called from application thread + // called from other threads including net threads void net_plugin_impl::on_voted_block(const vote_message& msg) { fc_dlog(logger, "on voted signal: block #${bn} ${id}.., ${t}, key ${k}..", ("bn", block_header::num_from_id(msg.block_id))("id", msg.block_id.str().substr(8,16)) diff --git a/unittests/finalizer_tests.cpp b/unittests/finalizer_tests.cpp index 45aa349231..7d5fd9fcd0 100644 --- a/unittests/finalizer_tests.cpp +++ b/unittests/finalizer_tests.cpp @@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE( basic_finalizer_safety_file_io ) try { bls_pub_priv_key_map_t local_finalizers = { { k.pubkey_str, k.privkey_str } }; { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; fset.set_keys(local_finalizers); fset.set_fsi(k.pubkey, fsi); @@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE( basic_finalizer_safety_file_io ) try { } { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; fset.set_keys(local_finalizers); // that's when the finalizer safety file is read // make sure the safety info for our finalizer that we saved above is restored correctly @@ -123,7 +123,7 @@ BOOST_AUTO_TEST_CASE( corrupt_finalizer_safety_file ) try { bls_pub_priv_key_map_t local_finalizers = { { k.pubkey_str, k.privkey_str } }; { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; fset.set_keys(local_finalizers); fset.set_fsi(k.pubkey, fsi); @@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE( corrupt_finalizer_safety_file ) try { } { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; BOOST_REQUIRE_THROW(fset.set_keys(local_finalizers), // that's when the finalizer safety file is read finalizer_safety_exception); @@ -159,7 +159,7 @@ BOOST_AUTO_TEST_CASE( finalizer_safety_file_io ) try { std::vector keys = create_keys(10); { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; bls_pub_priv_key_map_t local_finalizers = create_local_finalizers<1, 3, 5, 6>(keys); fset.set_keys(local_finalizers); @@ -171,7 +171,7 @@ BOOST_AUTO_TEST_CASE( finalizer_safety_file_io ) try { } { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; bls_pub_priv_key_map_t local_finalizers = create_local_finalizers<3>(keys); fset.set_keys(local_finalizers); @@ -186,7 +186,7 @@ BOOST_AUTO_TEST_CASE( finalizer_safety_file_io ) try { } { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; bls_pub_priv_key_map_t local_finalizers = create_local_finalizers<3>(keys); fset.set_keys(local_finalizers); @@ -197,7 +197,7 @@ BOOST_AUTO_TEST_CASE( finalizer_safety_file_io ) try { // even though we didn't activate finalizers 1, 5, or 6 in the prior test, and we wrote the safety file, // make sure we have not lost the fsi that was set originally for these finalizers. { - my_finalizers_t fset{.t_startup = block_timestamp_type{}, .persist_file_path = safety_file_path}; + my_finalizers_t fset{block_timestamp_type{}, safety_file_path}; bls_pub_priv_key_map_t local_finalizers = create_local_finalizers<1, 5, 6>(keys); fset.set_keys(local_finalizers); From 33210c0f31792947b2ba5897bbb4e509543fa83a Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 9 Apr 2024 15:32:12 -0500 Subject: [PATCH 30/31] GH-2102 Small cleanup from PR review --- libraries/chain/controller.cpp | 21 +++++++++++-------- libraries/chain/hotstuff/hotstuff.cpp | 5 +++-- .../eosio/chain/hotstuff/finalizer.hpp | 2 +- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 3e43f1263a..d89289d042 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3347,7 +3347,7 @@ struct controller_impl { ("p", bsp->producer())("id", bsp->id().str().substr(8, 16))("n", bsp->block_num())("t", bsp->timestamp()) ("count", bsp->block->transactions.size())("lib", fork_db_root_block_num()) ("net", br.total_net_usage)("cpu", br.total_cpu_usage_us) - ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (fc::time_point::now() - bsp->timestamp()).count() / 1000)); + ("elapsed", br.total_elapsed_time)("time", br.total_time)("latency", (now - bsp->timestamp()).count() / 1000)); const auto& hb_id = chain_head.id(); const auto& hb = chain_head.block(); if (read_mode != db_read_mode::IRREVERSIBLE && hb && hb_id != bsp->id() && hb != nullptr) { // not applied to head @@ -3369,7 +3369,10 @@ struct controller_impl { auto start = fc::time_point::now(); const bool already_valid = bsp->is_valid(); - if (!already_valid) // has not been validated (applied) before, only in forkdb, see if we can vote now + // When bsp was created in create_block_state_i, bsp was considered for voting. At that time, bsp->final_on_strong_qc_block_ref may + // not have been validated and we could not vote. At this point bsp->final_on_strong_qc_block_ref has been validated and we can vote. + // Only need to consider voting if not already validated, if already validated then we have already voted. + if (!already_valid) consider_voting(bsp); const signed_block_ptr& b = bsp->block; @@ -3974,7 +3977,9 @@ struct controller_impl { if (chain_head.id() != pending_head->id() && pending_head->id() != forkdb.head()->id()) { dlog("switching forks on controller->maybe_switch_forks call"); controller::block_report br; - maybe_switch_forks(br, pending_head, pending_head->is_valid() ? controller::block_status::validated : controller::block_status::complete, + maybe_switch_forks(br, pending_head, + pending_head->is_valid() ? controller::block_status::validated + : controller::block_status::complete, cb, trx_lookup); } } @@ -4001,8 +4006,8 @@ struct controller_impl { bool switch_fork = !branches.second.empty(); if( switch_fork ) { auto head_fork_comp_str = apply(chain_head, [](auto& head) -> std::string { return log_fork_comparison(*head); }); - ilog("switching forks from ${current_head_id} (block number ${current_head_num}) ${c} to ${new_head_id} (block number ${new_head_num}) ${n}", - ("current_head_id", chain_head.id())("current_head_num", chain_head.block_num())("new_head_id", new_head->id())("new_head_num", new_head->block_num()) + ilog("switching forks from ${chid} (block number ${chn}) ${c} to ${nhid} (block number ${nhn}) ${n}", + ("chid", chain_head.id())("chn}", chain_head.block_num())("nhid", new_head->id())("nhn", new_head->block_num()) ("c", head_fork_comp_str)("n", log_fork_comparison(*new_head))); // not possible to log transaction specific info when switching forks @@ -5060,16 +5065,14 @@ bool controller::block_exists(const block_id_type& id) const { bool exists = my->fork_db_block_exists(id); if( exists ) return true; std::optional sbh = my->blog.read_block_header_by_num( block_header::num_from_id(id) ); - if( sbh && sbh->calculate_id() == id ) return true; - return false; + return sbh && sbh->calculate_id() == id; } bool controller::validated_block_exists(const block_id_type& id) const { bool exists = my->fork_db_validated_block_exists(id); if( exists ) return true; std::optional sbh = my->blog.read_block_header_by_num( block_header::num_from_id(id) ); - if( sbh && sbh->calculate_id() == id ) return true; - return false; + return sbh && sbh->calculate_id() == id; } std::optional controller::fetch_block_header_by_id( const block_id_type& id )const { diff --git a/libraries/chain/hotstuff/hotstuff.cpp b/libraries/chain/hotstuff/hotstuff.cpp index 9cd69d2e00..ca39f7d93f 100644 --- a/libraries/chain/hotstuff/hotstuff.cpp +++ b/libraries/chain/hotstuff/hotstuff.cpp @@ -41,7 +41,7 @@ void pending_quorum_certificate::votes_t::reflector_init() { } bool pending_quorum_certificate::votes_t::has_voted(size_t index) const { - assert(index <= _processed.size()); + assert(index < _processed.size()); return _processed[index].load(std::memory_order_relaxed); } @@ -163,6 +163,7 @@ vote_status pending_quorum_certificate::add_vote(block_num_type block_num, bool return s; } +// called by get_best_qc which acquires a mutex valid_quorum_certificate pending_quorum_certificate::to_valid_quorum_certificate() const { valid_quorum_certificate valid_qc; @@ -203,7 +204,7 @@ std::optional pending_quorum_certificate::get_best_qc(block_ // Strong beats weak. Tie break by valid_qc. const auto& best_qc = _valid_qc->is_strong() == valid_qc_from_pending.is_strong() ? - *_valid_qc : // tie broke by valid_qc + *_valid_qc : // tie broken by valid_qc _valid_qc->is_strong() ? *_valid_qc : valid_qc_from_pending; // strong beats weak return std::optional{quorum_certificate{ block_num, best_qc }}; } diff --git a/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp b/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp index 786afbc53a..762524a46b 100644 --- a/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp +++ b/libraries/chain/include/eosio/chain/hotstuff/finalizer.hpp @@ -99,7 +99,7 @@ namespace eosio::chain { votes.reserve(finalizers.size()); // Possible improvement in the future, look at locking only individual finalizers and releasing the lock for writing the file. - // Would require making sure that only the latest is ever written to the file. + // Would require making sure that only the latest is ever written to the file and that the file access was protected separately. std::unique_lock g(mtx); // first accumulate all the votes From 5cc1b87f3f1256d155c131e6698079c8642d7b20 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 9 Apr 2024 15:53:31 -0500 Subject: [PATCH 31/31] GH-2102 Move Produced block log into controller_impl commit_block --- libraries/chain/controller.cpp | 36 +++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index d89289d042..1b0a078d4a 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -3127,7 +3127,9 @@ struct controller_impl { /** * @post regardless of the success of commit block there is no active pending block */ - void commit_block( controller::block_status s ) { + void commit_block( controller::block_report& br, controller::block_status s ) { + fc::time_point start = fc::time_point::now(); + auto reset_pending_on_exit = fc::make_scoped_exit([this]{ pending.reset(); }); @@ -3191,6 +3193,19 @@ struct controller_impl { apply_s(chain_head, [&](const auto& head) { create_and_send_vote_msg(head); }); } + + if (s == controller::block_status::incomplete) { + const auto& id = chain_head.id(); + const auto& new_b = chain_head.block(); + br.total_time += fc::time_point::now() - start; + + ilog("Produced block ${id}... #${n} @ ${t} signed by ${p} " + "[trxs: ${count}, lib: ${lib}, confirmed: ${confs}, net: ${net}, cpu: ${cpu}, elapsed: ${et}, time: ${tt}]", + ("p", new_b->producer)("id", id.str().substr(8, 16))("n", new_b->block_num())("t", new_b->timestamp) + ("count", new_b->transactions.size())("lib", fork_db_root_block_num())("net", br.total_net_usage) + ("cpu", br.total_cpu_usage_us)("et", br.total_elapsed_time)("tt", br.total_time)("confs", new_b->confirmed)); + } + } catch (...) { // dont bother resetting pending, instead abort the block reset_pending_on_exit.cancel(); @@ -3504,8 +3519,8 @@ struct controller_impl { pending->_block_stage = completed_block{ block_handle{bsp} }; br = pending->_block_report; // copy before commit block destroys pending - commit_block(s); - br.total_time = fc::time_point::now() - start; + br.total_time += fc::time_point::now() - start; + commit_block(br, s); if (!already_valid) log_applied(br, bsp); @@ -3807,6 +3822,7 @@ struct controller_impl { } // Don't save the QC from block extension if the claimed block has a better or same valid_qc. + // claimed->valid_qc_is_strong() acquires a mutex. if (received_qc.is_weak() || claimed->valid_qc_is_strong()) { dlog("qc not better, claimed->valid: ${qbn} ${qid}, strong=${s}, received: ${rqc}, for block ${bn} ${id}", ("qbn", claimed->block_num())("qid", claimed->id())("s", !received_qc.is_weak()) // use is_weak() to avoid mutex on valid_qc_is_strong() @@ -4839,20 +4855,8 @@ void controller::assemble_and_complete_block( block_report& br, const signer_cal } void controller::commit_block(block_report& br) { - fc::time_point start = fc::time_point::now(); - validate_db_available_size(); - my->commit_block(block_status::incomplete); - - const auto& id = head_block_id(); - const auto& new_b = head_block(); - br.total_time += fc::time_point::now() - start; - - ilog("Produced block ${id}... #${n} @ ${t} signed by ${p} " - "[trxs: ${count}, lib: ${lib}, confirmed: ${confs}, net: ${net}, cpu: ${cpu}, elapsed: ${et}, time: ${tt}]", - ("p", new_b->producer)("id", id.str().substr(8, 16))("n", new_b->block_num())("t", new_b->timestamp) - ("count", new_b->transactions.size())("lib", last_irreversible_block_num())("net", br.total_net_usage) - ("cpu", br.total_cpu_usage_us)("et", br.total_elapsed_time)("tt", br.total_time)("confs", new_b->confirmed)); + my->commit_block(br, block_status::incomplete); } void controller::maybe_switch_forks(const forked_callback_t& cb, const trx_meta_cache_lookup& trx_lookup) {