From 52de3c34068f3edc4ad3a487cc1722c1758ac8bc Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 13:58:21 -0500 Subject: [PATCH 1/9] Add extraNodeosArgs missing in PR#661 --- tests/liveness_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/liveness_test.py b/tests/liveness_test.py index 9dd45c8bb5..ba526d039e 100755 --- a/tests/liveness_test.py +++ b/tests/liveness_test.py @@ -40,7 +40,7 @@ # test relies on production continuing extraNodeosArgs=" --production-pause-vote-timeout-ms 0 " # "mesh" shape connects nodeA and nodeB to each other - if cluster.launch(topo="mesh", pnodes=totalProducerNodes, + if cluster.launch(topo="mesh", pnodes=totalProducerNodes, extraNodeosArgs=extraNodeosArgs, totalNodes=totalNodes, totalProducers=totalProducerNodes, loadSystemContract=False, activateIF=activateIF, biosFinalizer=False) is False: Utils.cmdError("launcher") From b7fdf9c062236aaabb49cf02b611f4b7fc4043c7 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 13:59:14 -0500 Subject: [PATCH 2/9] Change to debug level to match other logging in this function --- libraries/chain/finality/finalizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/chain/finality/finalizer.cpp b/libraries/chain/finality/finalizer.cpp index 5a2ec84004..9a8e1f009f 100644 --- a/libraries/chain/finality/finalizer.cpp +++ b/libraries/chain/finality/finalizer.cpp @@ -34,7 +34,7 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) { res.liveness_check = bsp->core.latest_qc_block_timestamp() > fsi.lock.timestamp; if (!res.liveness_check) { - fc_ilog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}", + fc_dlog(vote_logger, "liveness check failed, block ${bn} ${id}: ${c} <= ${l}, fsi.lock ${lbn} ${lid}, latest_qc_claim: ${qc}", ("bn", bsp->block_num())("id", bsp->id())("c", bsp->core.latest_qc_block_timestamp())("l", fsi.lock.timestamp) ("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id) ("qc", bsp->core.latest_qc_claim())); From b4fc70c748b9bd3ee2a4b1f80c4e32669cbb0cd1 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 14:00:01 -0500 Subject: [PATCH 3/9] Modify log message to say "no qc" instead of "no votes", also include qc_claim in log --- plugins/chain_plugin/tracked_votes.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/chain_plugin/tracked_votes.cpp b/plugins/chain_plugin/tracked_votes.cpp index 93196b4be2..71f28d859f 100644 --- a/plugins/chain_plugin/tracked_votes.cpp +++ b/plugins/chain_plugin/tracked_votes.cpp @@ -39,9 +39,11 @@ namespace eosio::chain_apis { return; if (!block->contains_extension(chain::quorum_certificate_extension::extension_id())) { - fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no votes", + std::optional fin_ext = block->extract_header_extension(chain::finality_extension::extension_id()); + chain::qc_claim_t claim = fin_ext ? std::get(*fin_ext).qc_claim : chain::qc_claim_t{}; + fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no qc, claim: ${c}", ("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer) - ("l", (now - block->timestamp).count() / 1000)); + ("l", (now - block->timestamp).count() / 1000)("c", claim)); return; } From 7067b42ded1add176cdf575fc158c9d5359ba25d Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 14:00:55 -0500 Subject: [PATCH 4/9] Improve Not producing because log messages to include block timestamp. Also indicate why producer is implicitly paused. --- plugins/producer_plugin/producer_plugin.cpp | 51 +++++++++++++-------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index a69f72532c..4942faf017 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -755,23 +755,32 @@ class producer_plugin_impl : public std::enable_shared_from_this _production_pause_vote_timeout) - || (_other_active_finalizers && - _accepted_block_time - last_other_vote_received > _production_pause_vote_timeout); + if (_is_producer_active_finalizer && + _accepted_block_time - last_producer_vote_received > _production_pause_vote_timeout) { + return implicit_pause::prod_paused; + } + if (_other_active_finalizers && + _accepted_block_time - last_other_vote_received > _production_pause_vote_timeout) { + return implicit_pause::other_paused; + } + return implicit_pause::not_paused; } void abort_block() { @@ -1598,7 +1607,7 @@ void producer_plugin::resume() { } bool producer_plugin::paused() const { - return my->_pause_production || my->is_implicitly_paused(); + return my->_pause_production || (my->implicitly_paused() != producer_plugin_impl::implicit_pause::not_paused); } void producer_plugin_impl::update_runtime_options(const producer_plugin::runtime_options& options) { @@ -1963,28 +1972,30 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() { } else if (_producers.find(scheduled_producer.producer_name) == _producers.end()) { _pending_block_mode = pending_block_mode::speculating; } else if (num_relevant_signatures == 0) { - fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}", - ("authority", scheduled_producer.authority)); + fc_elog(_log, "Not producing block because I don't have any private keys relevant to authority: ${authority}, block ${t}", + ("authority", scheduled_producer.authority)("t", block_time)); _pending_block_mode = pending_block_mode::speculating; not_producing_when_time = true; } else if (_pause_production) { - fc_wlog(_log, "Not producing block because production is explicitly paused"); + fc_wlog(_log, "Not producing block because production is explicitly paused, block ${t}", ("t", block_time)); _pending_block_mode = pending_block_mode::speculating; not_producing_when_time = true; } else if (_max_irreversible_block_age_us.count() >= 0 && irreversible_block_age >= _max_irreversible_block_age_us) { - fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s]", - ("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000)); + fc_elog(_log, "Not producing block because the irreversible block is too old [age:${age}s, max:${max}s], block ${t}", + ("age", irreversible_block_age.count() / 1'000'000)("max", _max_irreversible_block_age_us.count() / 1'000'000) + ("t", block_time)); _pending_block_mode = pending_block_mode::speculating; not_producing_when_time = true; - } else if (is_implicitly_paused()) { - fc_elog(_log, "Not producing block because no recent votes, last producer vote ${pv}, other votes ${ov}, last block time ${bt}", - ("pv", _last_producer_vote_received.load(std::memory_order_relaxed)) + } else if (implicit_pause p = implicitly_paused(); p != implicit_pause::not_paused) { + std::string reason = p == implicit_pause::prod_paused ? "producer votes" : "votes received"; + fc_elog(_log, "Not producing block because no recent ${r}, block ${t}, last producer vote ${pv}, other votes ${ov}, last accepted block time ${bt}", + ("r", std::move(reason))("t", block_time)("pv", _last_producer_vote_received.load(std::memory_order_relaxed)) ("ov", _last_other_vote_received.load(std::memory_order_relaxed))("bt", _accepted_block_time)); _pending_block_mode = pending_block_mode::speculating; not_producing_when_time = true; } else if (_max_reversible_blocks > 0 && head_block_num - head.irreversible_blocknum() > _max_reversible_blocks) { - fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}.", - ("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum())); + fc_elog(_log, "Not producing block because max-reversible-blocks ${m} reached, head ${h}, lib ${l}, block ${t}", + ("m", _max_reversible_blocks)("h", head_block_num)("l", head.irreversible_blocknum())("t", block_time)); _pending_block_mode = pending_block_mode::speculating; not_producing_when_time = true; } From e28b78dc249b0fcae1b5fdf9f9b1b1cbcb75b5eb Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 14:12:49 -0500 Subject: [PATCH 5/9] Fix comment --- plugins/producer_plugin/producer_plugin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 4942faf017..6e76b1f481 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -757,8 +757,8 @@ class producer_plugin_impl : public std::enable_shared_from_this Date: Wed, 28 Aug 2024 14:26:20 -0500 Subject: [PATCH 6/9] Only extract finality_extension if we are going to log qc_claim --- plugins/chain_plugin/tracked_votes.cpp | 12 +++++++----- plugins/producer_plugin/producer_plugin.cpp | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/plugins/chain_plugin/tracked_votes.cpp b/plugins/chain_plugin/tracked_votes.cpp index 71f28d859f..16c6cdb4fd 100644 --- a/plugins/chain_plugin/tracked_votes.cpp +++ b/plugins/chain_plugin/tracked_votes.cpp @@ -39,11 +39,13 @@ namespace eosio::chain_apis { return; if (!block->contains_extension(chain::quorum_certificate_extension::extension_id())) { - std::optional fin_ext = block->extract_header_extension(chain::finality_extension::extension_id()); - chain::qc_claim_t claim = fin_ext ? std::get(*fin_ext).qc_claim : chain::qc_claim_t{}; - fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no qc, claim: ${c}", - ("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer) - ("l", (now - block->timestamp).count() / 1000)("c", claim)); + if (chain::vote_logger.is_enabled(fc::log_level::info)) { + std::optional fin_ext = block->extract_header_extension(chain::finality_extension::extension_id()); + chain::qc_claim_t claim = fin_ext ? std::get(*fin_ext).qc_claim : chain::qc_claim_t{}; + fc_ilog(chain::vote_logger, "Block ${id}... #${n} @ ${t} produced by ${p}, latency: ${l}ms has no qc, claim: ${c}", + ("id", id.str().substr(8, 16))("n", block->block_num())("t", block->timestamp)("p", block->producer) + ("l", (now - block->timestamp).count() / 1000)("c", claim)); + } return; } diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 6e76b1f481..26c2799e2f 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -758,7 +758,7 @@ class producer_plugin_impl : public std::enable_shared_from_this Date: Wed, 28 Aug 2024 19:00:54 -0500 Subject: [PATCH 7/9] Use info level logging when can't vote in decide_vote. Don't log safety check when liveness succeeds as it is not checked. Use debug level for all but the summary log in decide_vote. Add additional info to info level decide_vote summary log. --- libraries/chain/finality/finalizer.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/libraries/chain/finality/finalizer.cpp b/libraries/chain/finality/finalizer.cpp index 9a8e1f009f..e275df3466 100644 --- a/libraries/chain/finality/finalizer.cpp +++ b/libraries/chain/finality/finalizer.cpp @@ -42,7 +42,7 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) { // Safety check : check if this proposal extends the proposal we're locked on res.safety_check = bsp->core.extends(fsi.lock.block_id); if (!res.safety_check) { - fc_wlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}", + fc_dlog(vote_logger, "safety check failed, block ${bn} ${id} did not extend fsi.lock ${lbn} ${lid}", ("bn", bsp->block_num())("id", bsp->id())("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)); } } @@ -50,7 +50,8 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) { // Safety and Liveness both fail if `fsi.lock` is empty. It should not happen. // `fsi.lock` is initially set to `lib` when switching to IF or starting from a snapshot. // ------------------------------------------------------------------------------------- - fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", ("bn", bsp->block_num())("id", bsp->id())); + fc_dlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", + ("bn", bsp->block_num())("id", bsp->id())); res.liveness_check = false; res.safety_check = false; } @@ -94,9 +95,21 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) { fsi.last_vote = bsp->make_block_ref(); } - fc_dlog(vote_logger, "block=${bn} ${id}, liveness_check=${l}, safety_check=${s}, monotony_check=${m}, can vote=${can_vote}, voting=${v}, locked=${lbn} ${lid}", - ("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check)("m",res.monotony_check) - ("can_vote",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)); + if (res.liveness_check) { + fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}", + ("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check) + ("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)); + } else if (can_vote) { + fc_dlog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, locked=${lbn} ${lid}", + ("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check) + ("cn",can_vote)("v", res.decision)("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)); + } else { + fc_ilog(vote_logger, "block=${bn} ${id}, liveness=${l}, safety=${s}, can vote=${cn}, voting=${v}, " + "${ct} <= ${lt}, locked=${lbn} ${lid}, latest_qc_claim: ${qc}", + ("bn", bsp->block_num())("id", bsp->id())("l",res.liveness_check)("s",res.safety_check) + ("cn",can_vote)("v", res.decision)("ct", bsp->core.latest_qc_block_timestamp())("lt", fsi.lock.timestamp) + ("lbn", fsi.lock.block_num())("lid", fsi.lock.block_id)("qc", bsp->core.latest_qc_claim())); + } return res; } From 2955a1cc0bf42ab5cd95147af26ea1ffd940c164 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 19:28:44 -0500 Subject: [PATCH 8/9] Use FC_RETHROW_EXCEPTIONS so context can be captured. --- libraries/chain/finality/finalizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/chain/finality/finalizer.cpp b/libraries/chain/finality/finalizer.cpp index e275df3466..c280f4c6c8 100644 --- a/libraries/chain/finality/finalizer.cpp +++ b/libraries/chain/finality/finalizer.cpp @@ -339,7 +339,7 @@ my_finalizers_t::fsi_map my_finalizers_t::load_finalizer_safety_info() { // close file after write cfile_ds.close(); - } FC_LOG_AND_RETHROW() + } FC_RETHROW_EXCEPTIONS(log_level::error, "corrupted finalizer safety persistence file ${p}", ("p", persist_file_path)) // don't remove file we can't load return res; } From ae8152721321739e3de72a68d34a50a9c7a3fc0e Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 28 Aug 2024 20:00:44 -0500 Subject: [PATCH 9/9] Use warn log for fsi.lock empty log --- libraries/chain/finality/finalizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/chain/finality/finalizer.cpp b/libraries/chain/finality/finalizer.cpp index c280f4c6c8..75790288ac 100644 --- a/libraries/chain/finality/finalizer.cpp +++ b/libraries/chain/finality/finalizer.cpp @@ -50,7 +50,7 @@ finalizer::vote_result finalizer::decide_vote(const block_state_ptr& bsp) { // Safety and Liveness both fail if `fsi.lock` is empty. It should not happen. // `fsi.lock` is initially set to `lib` when switching to IF or starting from a snapshot. // ------------------------------------------------------------------------------------- - fc_dlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", + fc_wlog(vote_logger, "liveness check & safety check failed, block ${bn} ${id}, fsi.lock is empty", ("bn", bsp->block_num())("id", bsp->id())); res.liveness_check = false; res.safety_check = false;