From 641feefc01e46dfed3d295353dafe383ed77d7ad Mon Sep 17 00:00:00 2001 From: greg7mdp Date: Fri, 13 Oct 2023 10:46:54 -0400 Subject: [PATCH 01/31] avoid unnecessarily creating dirty pages when rebalancing chainbase rbtrees. --- libraries/chainbase | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/chainbase b/libraries/chainbase index 3bfb0d00b6..13c9c35e39 160000 --- a/libraries/chainbase +++ b/libraries/chainbase @@ -1 +1 @@ -Subproject commit 3bfb0d00b61087bff804c622ffa5929fec2fa4ed +Subproject commit 13c9c35e393f1739c053ff7a03edb4d9df30990d From ad44032faa25850419285fc7d335955e806d56bc Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Fri, 13 Oct 2023 16:01:29 -0400 Subject: [PATCH 02/31] hardening resource manager plugin shutdown --- .../file_space_handler.hpp | 40 ++++++++++++++++++- .../resource_monitor_plugin.cpp | 1 + 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 1ddcd82bfa..46735a6ebc 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -6,6 +6,8 @@ #include #include +#include + namespace bfs = boost::filesystem; namespace eosio::resource_monitor { @@ -131,26 +133,60 @@ namespace eosio::resource_monitor { } void space_monitor_loop() { + space_monitor_task_running = true; + + if (stopping) { + space_monitor_task_running = false; + return; + } if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); appbase::app().quit(); // This will gracefully stop Nodeos + space_monitor_task_running = false; return; } update_warning_interval_counter(); timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); - + if (stopping) { + space_monitor_task_running = false; + return; + } timer.async_wait([this](auto& ec) { if ( ec ) { wlog("Exit due to error: ${ec}, message: ${message}", ("ec", ec.value()) ("message", ec.message())); + space_monitor_task_running = false; return; } else { // Loop over space_monitor_loop(); } }); + space_monitor_task_running = false; + } + + // called on main thread from plugin shutdown() + void stop() { + // signalling space_monitor_loop that plugin is being stopped + stopping = true; + + // cancel outstanding scheduled space_monitor_loop task + timer.cancel(); + + // make sure space_monitor_loop has exited. + // in vast majority time, it is not running; just waiting to be executed + using namespace std::chrono_literals; // for ms + auto num_sleeps = 0u; + constexpr auto max_num_sleeps = 10u; + while (space_monitor_task_running && num_sleeps < max_num_sleeps) { + std::this_thread::sleep_for( 1ms ); + ++num_sleeps; + } + if (space_monitor_task_running) { + wlog("space_monitor_loop not stopped after ${n} ms", ("n", num_sleeps)); + } } private: @@ -164,6 +200,8 @@ namespace eosio::resource_monitor { uint64_t shutdown_absolute {0}; uint64_t warning_absolute {0}; bool shutdown_on_exceeded {true}; + std::atomic stopping {false}; // set on main thread, checked on resmon thread + std::atomic space_monitor_task_running {false}; // set on resmon thread, checked on main thread. after space_monitor_loop is not running, it is safe to stop struct filesystem_info { dev_t st_dev; // device id of file system containing "file_path" diff --git a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp index 17f6b2bd3f..c01eede115 100644 --- a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp +++ b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp @@ -142,6 +142,7 @@ class resource_monitor_plugin_impl { void plugin_shutdown() { ilog("shutdown..."); + space_handler.stop(); ctx.stop(); // Wait for the thread to end From fa5ff2f966b3d416d410f92c3d8828395b10499f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 13 Oct 2023 15:23:22 -0500 Subject: [PATCH 03/31] GH-1677 Avoid stack overflow by posting to ship io_context on recursive send() --- .../include/eosio/state_history_plugin/session.hpp | 12 ++++++++++-- .../state_history_plugin/state_history_plugin.cpp | 3 ++- plugins/state_history_plugin/tests/session_test.cpp | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/plugins/state_history_plugin/include/eosio/state_history_plugin/session.hpp b/plugins/state_history_plugin/include/eosio/state_history_plugin/session.hpp index 6b2e80f1d9..f9d656c1d7 100644 --- a/plugins/state_history_plugin/include/eosio/state_history_plugin/session.hpp +++ b/plugins/state_history_plugin/include/eosio/state_history_plugin/session.hpp @@ -57,11 +57,15 @@ class session_manager { private: using entry_ptr = std::unique_ptr; + boost::asio::io_context& ship_io_context; std::set> session_set; bool sending = false; std::deque, entry_ptr>> send_queue; public: + explicit session_manager(boost::asio::io_context& ship_io_context) + : ship_io_context(ship_io_context) {} + void insert(std::shared_ptr s) { session_set.insert(std::move(s)); } @@ -103,8 +107,12 @@ class session_manager { void pop_entry(bool call_send = true) { send_queue.erase(send_queue.begin()); sending = false; - if (call_send || !send_queue.empty()) - send(); + if (call_send || !send_queue.empty()) { + // avoid blowing the stack + boost::asio::post(ship_io_context, [this]() { + send(); + }); + } } void send_updates() { diff --git a/plugins/state_history_plugin/state_history_plugin.cpp b/plugins/state_history_plugin/state_history_plugin.cpp index d32842fd93..fc5d649bdb 100644 --- a/plugins/state_history_plugin/state_history_plugin.cpp +++ b/plugins/state_history_plugin/state_history_plugin.cpp @@ -75,7 +75,6 @@ struct state_history_plugin_impl : std::enable_shared_from_this thread_pool; + session_manager session_mgr{thread_pool.get_executor()}; + bool plugin_started = false; static fc::logger& logger() { return _log; } diff --git a/plugins/state_history_plugin/tests/session_test.cpp b/plugins/state_history_plugin/tests/session_test.cpp index b1f13752b0..ee6e22d5aa 100644 --- a/plugins/state_history_plugin/tests/session_test.cpp +++ b/plugins/state_history_plugin/tests/session_test.cpp @@ -100,7 +100,7 @@ struct mock_state_history_plugin { fc::temp_directory log_dir; std::optional log; std::atomic stopping = false; - eosio::session_manager session_mgr; + eosio::session_manager session_mgr{ship_ioc}; constexpr static uint32_t default_frame_size = 1024; From 1705e35b798d4bedfcb1023f64fe7ff1b0fd5dbe Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 13 Oct 2023 15:24:44 -0500 Subject: [PATCH 04/31] GH-1677 Update index_begin_block on log split otherwise next lookup in index is at the wrong offset --- .../state_history/include/eosio/state_history/log.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index a3a0da67f7..c1da12cd10 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -74,9 +74,10 @@ struct state_history_log_header { chain::block_id_type block_id = {}; uint64_t payload_size = 0; }; -static const int state_history_log_header_serial_size = sizeof(state_history_log_header::magic) + - sizeof(state_history_log_header::block_id) + - sizeof(state_history_log_header::payload_size); +static constexpr int state_history_log_header_serial_size = sizeof(state_history_log_header::magic) + + sizeof(state_history_log_header::block_id) + + sizeof(state_history_log_header::payload_size); +static_assert(sizeof(state_history_log_header) == state_history_log_header_serial_size); namespace state_history { struct prune_config { @@ -324,7 +325,7 @@ class state_history_log { catalog.open(log_dir, conf.retained_dir, conf.archive_dir, name); catalog.max_retained_files = conf.max_retained_files; if (_end_block == 0) { - _begin_block = _end_block = catalog.last_block_num() +1; + _index_begin_block = _begin_block = _end_block = catalog.last_block_num() +1; } } }, _config); @@ -925,7 +926,7 @@ class state_history_log { catalog.add(_begin_block, _end_block - 1, log.get_file_path().parent_path(), name); - _begin_block = _end_block; + _index_begin_block = _begin_block = _end_block; using std::swap; swap(new_log_file, log); From 7052a94e11e9cc55ffe79166e642ed4c1425e427 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 15:14:45 -0400 Subject: [PATCH 05/31] use named_thread_pool to streamline thread management in resource monitor plugin --- .../file_space_handler.hpp | 110 +++++++++--------- .../resource_monitor_plugin.cpp | 39 +------ .../test/test_add_file_system.cpp | 4 +- .../test/test_threshold.cpp | 6 +- 4 files changed, 63 insertions(+), 96 deletions(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 46735a6ebc..5ecee7bd3c 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -5,8 +5,7 @@ #include #include - -#include +#include namespace bfs = boost::filesystem; @@ -14,12 +13,43 @@ namespace eosio::resource_monitor { template class file_space_handler { public: - file_space_handler(SpaceProvider&& space_provider, boost::asio::io_context& ctx) - :space_provider(std::move(space_provider)), - timer{ctx} + file_space_handler(SpaceProvider&& space_provider) + :space_provider(std::move(space_provider)) { } + void start(const std::vector& directories) { + for ( auto& dir: directories ) { + add_file_system( dir ); + + // A directory like "data" contains subdirectories like + // "block". Those subdirectories can mount on different + // file systems. Make sure they are taken care of. + for (bfs::directory_iterator itr(dir); itr != bfs::directory_iterator(); ++itr) { + if (bfs::is_directory(itr->path())) { + add_file_system( itr->path() ); + } + } + } + + thread_pool.start(thread_pool_size, + []( const fc::exception& e ) { + elog("Exception in read-only thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); + appbase::app().quit(); }, + [&]() { space_monitor_loop(); } + ); + } + + // called on main thread from plugin shutdown() + void stop() { + { + std::lock_guard g( timer_mtx ); + boost::system::error_code ec; + timer.cancel(ec); + } + thread_pool.stop(); + } + void set_sleep_time(uint32_t sleep_time) { sleep_time_in_secs = sleep_time; } @@ -133,66 +163,40 @@ namespace eosio::resource_monitor { } void space_monitor_loop() { - space_monitor_task_running = true; - - if (stopping) { - space_monitor_task_running = false; - return; - } if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); appbase::app().quit(); // This will gracefully stop Nodeos - space_monitor_task_running = false; return; } update_warning_interval_counter(); - timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); - if (stopping) { - space_monitor_task_running = false; - return; - } - timer.async_wait([this](auto& ec) { - if ( ec ) { - wlog("Exit due to error: ${ec}, message: ${message}", - ("ec", ec.value()) - ("message", ec.message())); - space_monitor_task_running = false; - return; - } else { - // Loop over - space_monitor_loop(); - } - }); - space_monitor_task_running = false; - } - - // called on main thread from plugin shutdown() - void stop() { - // signalling space_monitor_loop that plugin is being stopped - stopping = true; - - // cancel outstanding scheduled space_monitor_loop task - timer.cancel(); - - // make sure space_monitor_loop has exited. - // in vast majority time, it is not running; just waiting to be executed - using namespace std::chrono_literals; // for ms - auto num_sleeps = 0u; - constexpr auto max_num_sleeps = 10u; - while (space_monitor_task_running && num_sleeps < max_num_sleeps) { - std::this_thread::sleep_for( 1ms ); - ++num_sleeps; - } - if (space_monitor_task_running) { - wlog("space_monitor_loop not stopped after ${n} ms", ("n", num_sleeps)); + { + std::lock_guard g( timer_mtx ); + timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); + timer.async_wait([this](auto& ec) { + if ( ec ) { + if ( ec != boost::asio::error::operation_aborted ) { // not cancelled + wlog("Exit due to error: ${ec}, message: ${message}", + ("ec", ec.value()) + ("message", ec.message())); + } + return; + } else { + // Loop over + space_monitor_loop(); + } + }); } } private: SpaceProvider space_provider; - boost::asio::deadline_timer timer; + static constexpr size_t thread_pool_size = 1; + eosio::chain::named_thread_pool thread_pool; + + std::mutex timer_mtx; + boost::asio::deadline_timer timer {thread_pool.get_executor()}; uint32_t sleep_time_in_secs {2}; uint32_t shutdown_threshold {90}; @@ -200,8 +204,6 @@ namespace eosio::resource_monitor { uint64_t shutdown_absolute {0}; uint64_t warning_absolute {0}; bool shutdown_on_exceeded {true}; - std::atomic stopping {false}; // set on main thread, checked on resmon thread - std::atomic space_monitor_task_running {false}; // set on resmon thread, checked on main thread. after space_monitor_loop is not running, it is safe to stop struct filesystem_info { dev_t st_dev; // device id of file system containing "file_path" diff --git a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp index c01eede115..bb38152b09 100644 --- a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp +++ b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp @@ -40,7 +40,7 @@ namespace eosio { class resource_monitor_plugin_impl { public: resource_monitor_plugin_impl() - :space_handler(system_file_space_provider(), ctx) + :space_handler(system_file_space_provider()) { } @@ -112,43 +112,14 @@ class resource_monitor_plugin_impl { // Start main thread void plugin_startup() { - ilog("Creating and starting monitor thread"); - - // By now all plugins are initialized. - // Find out filesystems containing the directories requested - // so far. - for ( auto& dir: directories_registered ) { - space_handler.add_file_system( dir ); - - // A directory like "data" contains subdirectories like - // "block". Those subdirectories can mount on different - // file systems. Make sure they are taken care of. - for (bfs::directory_iterator itr(dir); itr != bfs::directory_iterator(); ++itr) { - if (fc::is_directory(itr->path())) { - space_handler.add_file_system( itr->path() ); - } - } - } - - monitor_thread = std::thread( [this] { - fc::set_os_thread_name( "resmon" ); // console_appender uses 9 chars for thread name reporting. - space_handler.space_monitor_loop(); - - ctx.run(); - } ); + space_handler.start(directories_registered); } // System is shutting down. void plugin_shutdown() { - ilog("shutdown..."); - + ilog("entered shutdown..."); space_handler.stop(); - ctx.stop(); - - // Wait for the thread to end - monitor_thread.join(); - - ilog("exit shutdown"); + ilog("exiting shutdown"); } void monitor_directory(const bfs::path& path) { @@ -173,8 +144,6 @@ class resource_monitor_plugin_impl { static constexpr uint32_t warning_interval_min = 1; static constexpr uint32_t warning_interval_max = 450; // e.g. if the monitor interval is 2 sec, the warning interval is at most 15 minutes - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; file_space_handler_t space_handler; }; diff --git a/plugins/resource_monitor_plugin/test/test_add_file_system.cpp b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp index 08ac6100c1..74a078adf9 100644 --- a/plugins/resource_monitor_plugin/test/test_add_file_system.cpp +++ b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp @@ -23,11 +23,9 @@ struct add_file_system_fixture { add_file_system_fixture& fixture; }; - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; add_file_system_fixture() - : space_handler(mock_space_provider(*this), ctx) + : space_handler(mock_space_provider(*this)) { } diff --git a/plugins/resource_monitor_plugin/test/test_threshold.cpp b/plugins/resource_monitor_plugin/test/test_threshold.cpp index c817d5cb13..4fd2e77b4c 100644 --- a/plugins/resource_monitor_plugin/test/test_threshold.cpp +++ b/plugins/resource_monitor_plugin/test/test_threshold.cpp @@ -23,11 +23,9 @@ struct threshold_fixture { threshold_fixture& fixture; }; - boost::asio::io_context ctx; - using file_space_handler_t = file_space_handler; threshold_fixture() - : space_handler(std::make_unique(mock_space_provider(*this), ctx)) + : space_handler(std::make_unique(mock_space_provider(*this))) { } @@ -49,7 +47,7 @@ struct threshold_fixture { bool test_threshold_common(std::map& available, std::map& dev, uint32_t warning_threshold=75) { bool first = test_threshold_common_(available, dev, warning_threshold); - space_handler = std::make_unique(mock_space_provider(*this), ctx); + space_handler = std::make_unique(mock_space_provider(*this)); test_absolute = true; bool second = test_threshold_common_(available, dev, warning_threshold); From 599b01d4ec03df20131a801049ae0e925ca6388a Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 15:23:16 -0400 Subject: [PATCH 06/31] Remove unnecessary unit tests Those tests were intended to verify the duration of space_monitor_loop. In essence they tested Boost's expires_from_noa, which was not necessary. The tests themselves were hacky and took uncessary 50 seconds. --- .../test/CMakeLists.txt | 2 +- .../test/test_monitor_loop.cpp | 151 ------------------ 2 files changed, 1 insertion(+), 152 deletions(-) delete mode 100644 plugins/resource_monitor_plugin/test/test_monitor_loop.cpp diff --git a/plugins/resource_monitor_plugin/test/CMakeLists.txt b/plugins/resource_monitor_plugin/test/CMakeLists.txt index a564c515dc..e9f0bf03ba 100644 --- a/plugins/resource_monitor_plugin/test/CMakeLists.txt +++ b/plugins/resource_monitor_plugin/test/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable( test_resmon_plugin test_resmon_plugin.cpp test_add_file_system.cpp test_monitor_loop.cpp test_threshold.cpp ) +add_executable( test_resmon_plugin test_resmon_plugin.cpp test_add_file_system.cpp test_threshold.cpp ) target_link_libraries( test_resmon_plugin resource_monitor_plugin ) target_include_directories( test_resmon_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) add_test(NAME test_resmon_plugin COMMAND plugins/resource_monitor_plugin/test/test_resmon_plugin WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp b/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp deleted file mode 100644 index f572cd53b7..0000000000 --- a/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp +++ /dev/null @@ -1,151 +0,0 @@ -#include - -#include - -using namespace eosio; -using namespace eosio::resource_monitor; -using namespace boost::system; - -struct space_handler_fixture { - struct mock_space_provider { - explicit mock_space_provider(space_handler_fixture& fixture) - :fixture(fixture) - {} - - int get_stat(const char *path, struct stat *buf) const { - return fixture.mock_get_stat(path, buf); - } - - bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const { - return fixture.mock_get_space(p, ec); - } - - space_handler_fixture& fixture; - }; - - boost::asio::io_context ctx; - - using file_space_handler_t = file_space_handler; - space_handler_fixture() - : space_handler(mock_space_provider( *this ), ctx) - { - } - - void add_file_system(const bfs::path& path_name) { - space_handler.add_file_system( path_name ); - } - - void set_threshold(uint32_t threshold, uint32_t warning_threshold) { - space_handler.set_threshold( threshold, warning_threshold ); - } - - void set_sleep_time(uint32_t sleep_time) { - space_handler.set_sleep_time( sleep_time ); - } - - void set_shutdown_on_exceeded(bool shutdown_on_exceeded) { - space_handler.set_shutdown_on_exceeded(shutdown_on_exceeded); - } - - bool is_threshold_exceeded() { - return space_handler.is_threshold_exceeded(); - } - - void space_monitor_loop() { - return space_handler.space_monitor_loop(); - } - - bool test_loop_common(int num_loops, int interval) - { - mock_get_space = [ i = 0, num_loops ]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { - ec = boost::system::errc::make_error_code(errc::success); - - bfs::space_info rc{}; - rc.capacity = 1000000; - - if ( i < num_loops + 1 ) { // "+ 1" for the get_space in add_file_system - rc.available = 300000; - } else { - rc.available = 100000; - } - - i++; - - return rc; - }; - - mock_get_stat = []( const char *path, struct stat *buf ) -> int { - buf->st_dev = 0; - return 0; - }; - - set_threshold(80, 75); - set_shutdown_on_exceeded(true); - set_sleep_time(interval); - add_file_system("/test"); - - auto start = std::chrono::system_clock::now(); - - auto monitor_thread = std::thread( [this] { - space_monitor_loop(); - ctx.run(); - }); - - monitor_thread.join(); - - auto end = std::chrono::system_clock::now(); - std::chrono::duration test_duration = end - start; - - // For tests to be repeatable on any platforms under any loads, - // particularly for longer runs, - // we just make sure the test duration is longer than a margin - // of theroretical duration. - bool finished_in_time = (test_duration >= std::chrono::duration((num_loops - 1) * interval)); - - return finished_in_time; - } - - // fixture data and methods - std::function mock_get_space; - std::function mock_get_stat; - - file_space_handler_t space_handler; -}; - -BOOST_AUTO_TEST_SUITE(monitor_loop_tests) - BOOST_FIXTURE_TEST_CASE(zero_loop, space_handler_fixture) - { - BOOST_TEST( test_loop_common(0, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(one_loop_1_secs_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(1, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(two_loops_1_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(2, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(ten_loops_1_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(10, 1) ); - } - - BOOST_FIXTURE_TEST_CASE(one_loop_5_secs_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(1, 5) ); - } - - BOOST_FIXTURE_TEST_CASE(two_loops_5_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(2, 5) ); - } - - BOOST_FIXTURE_TEST_CASE(five_loops_5_sec_interval, space_handler_fixture) - { - BOOST_TEST( test_loop_common(5, 5) ); - } - -BOOST_AUTO_TEST_SUITE_END() From dc87f58939deb89cad6bc3d63e5c2aa8c0403900 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Sun, 15 Oct 2023 17:22:39 -0400 Subject: [PATCH 07/31] update resource monitor plugin integration tests * removes unnecessary check of info logging "Creating and st arting monitor thread" * reduces time to wait for nodeos startup from 120 seconds to 10 seconds --- tests/resource_monitor_plugin_test.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/resource_monitor_plugin_test.py b/tests/resource_monitor_plugin_test.py index e50db5fac3..f6fa9c502d 100755 --- a/tests/resource_monitor_plugin_test.py +++ b/tests/resource_monitor_plugin_test.py @@ -23,6 +23,7 @@ stderrFile=dataDir + "/stderr.txt" testNum=0 +max_start_time_secs=10 # time nodeos takes to start # We need debug level to get more information about nodeos process logging="""{ @@ -105,7 +106,7 @@ def testCommon(title, extraNodeosArgs, expectedMsgs): prepareDirectories() - timeout=120 # Leave sufficient time such nodeos can start up fully in any platforms + timeout=max_start_time_secs # Leave sufficient time such nodeos can start up fully in any platforms runNodeos(extraNodeosArgs, timeout) for msg in expectedMsgs: @@ -156,7 +157,7 @@ def testInterval(title, extraNodeosArgs, interval, expectedMsgs, warningThreshol prepareDirectories() fillFS(dataDir, warningThreshold) - timeout = 120 + interval * 2 # Leave sufficient time so nodeos can start up fully in any platforms, and at least two warnings can be output + timeout = max_start_time_secs + interval * 2 # Leave sufficient time so nodeos can start up fully in any platforms, and at least two warnings can be output if timeout > testIntervalMaxTimeout: errorExit ("Max timeout for testInterval is %d sec" % (testIntervalMaxTimeout)) runNodeos(extraNodeosArgs, timeout) @@ -169,15 +170,15 @@ def testInterval(title, extraNodeosArgs, interval, expectedMsgs, warningThreshol errorExit ("Log containing \"%s\" should be output every %d seconds" % (msg, interval)) def testAll(): - testCommon("Resmon enabled: all arguments", "--plugin eosio::resource_monitor_plugin --resource-monitor-space-threshold=85 --resource-monitor-interval-seconds=5 --resource-monitor-not-shutdown-on-threshold-exceeded", ["threshold set to 85", "interval set to 5", "Shutdown flag when threshold exceeded set to false", "Creating and starting monitor thread"]) + testCommon("Resmon enabled: all arguments", "--plugin eosio::resource_monitor_plugin --resource-monitor-space-threshold=85 --resource-monitor-interval-seconds=5 --resource-monitor-not-shutdown-on-threshold-exceeded", ["threshold set to 85", "interval set to 5", "Shutdown flag when threshold exceeded set to false"]) # default arguments and default directories to be monitored - testCommon("Resmon not enabled: no arguments", "", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "Creating and starting monitor thread", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored"]) + testCommon("Resmon not enabled: no arguments", "", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored"]) # default arguments with registered directories - testCommon("Resmon not enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread"]) + testCommon("Resmon not enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored"]) - testCommon("Resmon enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::resource_monitor_plugin --plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis --resource-monitor-space-threshold=80 --resource-monitor-interval-seconds=3", ["snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread", "threshold set to 80", "interval set to 3", "Shutdown flag when threshold exceeded set to true"]) + testCommon("Resmon enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::resource_monitor_plugin --plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis --resource-monitor-space-threshold=80 --resource-monitor-interval-seconds=3", ["snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "threshold set to 80", "interval set to 3", "Shutdown flag when threshold exceeded set to true"]) # Only test minimum warning threshold (i.e. 6) to trigger warning as much as possible testInterval("Resmon enabled: set warning interval", From 5723fa927987f74f984f6852b7907835742dcda7 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 16 Oct 2023 10:16:44 -0500 Subject: [PATCH 08/31] GH-1677 Close files as rename/swap was not working --- .../state_history/include/eosio/state_history/log.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index c1da12cd10..d4fe5cb5ed 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -900,8 +900,8 @@ class state_history_log { fc::path log_file_path = log.get_file_path(); fc::path index_file_path = index.get_file_path(); - fc::datastream new_log_file; - fc::datastream new_index_file; + fc::cfile new_log_file; + fc::cfile new_index_file; fc::path tmp_log_file_path = log_file_path; tmp_log_file_path.replace_extension("log.tmp"); @@ -914,13 +914,14 @@ class state_history_log { try { new_log_file.open(fc::cfile::truncate_rw_mode); new_index_file.open(fc::cfile::truncate_rw_mode); - } catch (...) { wlog("Unable to open new state history log or index file for writing during log spliting, " "continue writing to existing block log file\n"); return; } + new_log_file.close(); + new_index_file.close(); index.close(); log.close(); @@ -937,6 +938,10 @@ class state_history_log { log.set_file_path(log_file_path); index.set_file_path(index_file_path); + + log.open(fc::cfile::update_rw_mode); + log.seek_end(0); + index.open(fc::cfile::create_or_update_rw_mode); } }; // state_history_log From b795fee0687ba1d91b433c71a1ead79d70a4aebf Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 16 Oct 2023 10:33:04 -0500 Subject: [PATCH 09/31] GH-1677 Add assert and error message --- .../state_history/include/eosio/state_history/log.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index d4fe5cb5ed..aca21a148c 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -578,10 +578,19 @@ class state_history_log { if (block_num >= _begin_block && block_num < _end_block) { state_history_log_header header; get_entry(block_num, header); + if (chain::block_header::num_from_id(header.block_id) != block_num) { // entry does not match requested block num + elog("id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(header.block_id))("b", block_num)); + assert(false); + } return header.block_id; } return {}; } + if (chain::block_header::num_from_id(*result) != block_num) { // catalog failed to fetch correct block id + elog("id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(*result))("b", block_num)); + assert(false); + } + return result; } From d4c417c11bdc92951c47edf5c6ade697153ea374 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Mon, 16 Oct 2023 13:39:36 -0400 Subject: [PATCH 10/31] simplify use of thread pool * no need to call cancel timer explicitly. * no need to use mutex for timer as when timer is used on the main thread, resource monitor thread has already stopped. --- .../file_space_handler.hpp | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp index 5ecee7bd3c..5fc4d96e84 100644 --- a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -34,7 +34,7 @@ namespace eosio::resource_monitor { thread_pool.start(thread_pool_size, []( const fc::exception& e ) { - elog("Exception in read-only thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); + elog("Exception in resource monitor plugin thread pool, exiting: ${e}", ("e", e.to_detail_string()) ); appbase::app().quit(); }, [&]() { space_monitor_loop(); } ); @@ -42,11 +42,9 @@ namespace eosio::resource_monitor { // called on main thread from plugin shutdown() void stop() { - { - std::lock_guard g( timer_mtx ); - boost::system::error_code ec; - timer.cancel(ec); - } + // After thread pool stops, timer is not accessible within it. + // In addition, timer's destructor will call cancel. + // Therefore, no need to call cancel explicitly. thread_pool.stop(); } @@ -162,6 +160,7 @@ namespace eosio::resource_monitor { ("path_name", path_name.string())("shutdown_available", to_gib(shutdown_available)) ("capacity", to_gib(info.capacity))("threshold_desc", threshold_desc()) ); } + // on resmon thread void space_monitor_loop() { if ( is_threshold_exceeded() && shutdown_on_exceeded ) { elog("Gracefully shutting down, exceeded file system configured threshold."); @@ -171,15 +170,16 @@ namespace eosio::resource_monitor { update_warning_interval_counter(); { - std::lock_guard g( timer_mtx ); timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); timer.async_wait([this](auto& ec) { if ( ec ) { - if ( ec != boost::asio::error::operation_aborted ) { // not cancelled - wlog("Exit due to error: ${ec}, message: ${message}", - ("ec", ec.value()) - ("message", ec.message())); - } + // No need to check if ec is operation_aborted (cancelled), + // as cancel callback will never be make it here after thread_pool + // is stopped, even though cancel is called in the timer's + // destructor. + wlog("Exit due to error: ${ec}, message: ${message}", + ("ec", ec.value()) + ("message", ec.message())); return; } else { // Loop over @@ -195,7 +195,6 @@ namespace eosio::resource_monitor { static constexpr size_t thread_pool_size = 1; eosio::chain::named_thread_pool thread_pool; - std::mutex timer_mtx; boost::asio::deadline_timer timer {thread_pool.get_executor()}; uint32_t sleep_time_in_secs {2}; From 6cb1237e718b7638e904d2d9f3337fa65e511da3 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 16 Oct 2023 12:45:41 -0500 Subject: [PATCH 11/31] GH-1406 code_cache not likely to be useful for replay, remove all state files for replay --- plugins/chain_plugin/chain_plugin.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/plugins/chain_plugin/chain_plugin.cpp b/plugins/chain_plugin/chain_plugin.cpp index 99612f7282..16855ed95f 100644 --- a/plugins/chain_plugin/chain_plugin.cpp +++ b/plugins/chain_plugin/chain_plugin.cpp @@ -432,14 +432,6 @@ void clear_directory_contents( const fc::path& p ) { } } -void clear_chainbase_files( const fc::path& p ) { - if( !fc::is_directory( p ) ) - return; - - fc::remove( p / "shared_memory.bin" ); - fc::remove( p / "shared_memory.meta" ); -} - namespace { // This can be removed when versions of eosio that support reversible chainbase state file no longer supported. void upgrade_from_reversible_to_fork_db(chain_plugin_impl* my) { @@ -732,7 +724,7 @@ void chain_plugin::plugin_initialize(const variables_map& options) { ilog( "Replay requested: deleting state database" ); if( options.at( "truncate-at-block" ).as() > 0 ) wlog( "The --truncate-at-block option does not work for a regular replay of the blockchain." ); - clear_chainbase_files( my->chain_config->state_dir ); + clear_directory_contents( my->chain_config->state_dir ); } else if( options.at( "truncate-at-block" ).as() > 0 ) { wlog( "The --truncate-at-block option can only be used with --hard-replay-blockchain." ); } From d6882ddf1cf484dfa4dbca6f6d49b7eef9217e42 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 16 Oct 2023 12:46:13 -0500 Subject: [PATCH 12/31] GH-1406 Re-create code cache on startup if it is corrupt --- .../runtimes/eos-vm-oc/code_cache.cpp | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp index aba47f4ef2..f76fd79a58 100644 --- a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp +++ b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp @@ -224,14 +224,15 @@ const code_descriptor* const code_cache_sync::get_descriptor_for_code_sync(const code_cache_base::code_cache_base(const boost::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, const chainbase::database& db) : _db(db), - _cache_file_path(data_dir/"code_cache.bin") -{ + _cache_file_path(data_dir/"code_cache.bin") { static_assert(sizeof(allocator_t) <= header_offset, "header offset intersects with allocator"); bfs::create_directories(data_dir); - if(!bfs::exists(_cache_file_path)) { - EOS_ASSERT(eosvmoc_config.cache_size >= allocator_t::get_min_size(total_header_size), database_exception, "configured code cache size is too small"); + bool created_file = false; + auto create_code_cache_file = [&] { + EOS_ASSERT(eosvmoc_config.cache_size >= allocator_t::get_min_size(total_header_size), database_exception, + "configured code cache size is too small"); std::ofstream ofs(_cache_file_path.generic_string(), std::ofstream::trunc); EOS_ASSERT(ofs.good(), database_exception, "unable to create EOS VM Optimized Compiler code cache"); bfs::resize_file(_cache_file_path, eosvmoc_config.cache_size); @@ -239,19 +240,35 @@ code_cache_base::code_cache_base(const boost::filesystem::path data_dir, const e bip::mapped_region creation_region(creation_mapping, bip::read_write); new (creation_region.get_address()) allocator_t(eosvmoc_config.cache_size, total_header_size); new ((char*)creation_region.get_address() + header_offset) code_cache_header; + created_file = true; + }; + + if (!bfs::exists(_cache_file_path)) { + create_code_cache_file(); } code_cache_header cache_header; - { - char header_buff[total_header_size]; - std::ifstream hs(_cache_file_path.generic_string(), std::ifstream::binary); - hs.read(header_buff, sizeof(header_buff)); - EOS_ASSERT(!hs.fail(), bad_database_version_exception, "failed to read code cache header"); - memcpy((char*)&cache_header, header_buff + header_offset, sizeof(cache_header)); - } + while (true) { + try { + { + char header_buff[total_header_size]; + std::ifstream hs(_cache_file_path.generic_string(), std::ifstream::binary); + hs.read(header_buff, sizeof(header_buff)); + EOS_ASSERT(!hs.fail(), bad_database_version_exception, "failed to read code cache header"); + memcpy((char*)&cache_header, header_buff + header_offset, sizeof(cache_header)); + } - EOS_ASSERT(cache_header.id == header_id, bad_database_version_exception, "existing EOS VM OC code cache not compatible with this version"); - EOS_ASSERT(!cache_header.dirty, database_exception, "code cache is dirty"); + EOS_ASSERT(cache_header.id == header_id, bad_database_version_exception, "existing EOS VM OC code cache not compatible with this version"); + EOS_ASSERT(!cache_header.dirty, database_exception, "code cache is dirty"); + break; + } catch (const fc::exception&) { + if (created_file) + throw; + + ilog("EOS VM optimized Compiler code cache corrupt, recreating"); + create_code_cache_file(); + } + } set_on_disk_region_dirty(true); From 76f1c7a0332bb24c029d0d42ffb5c227c38cf3dd Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 17 Oct 2023 07:43:02 -0500 Subject: [PATCH 13/31] GH-1406 Remove while loop and use lambda instead --- .../runtimes/eos-vm-oc/code_cache.cpp | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp index f76fd79a58..31aa3f3c8b 100644 --- a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp +++ b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp @@ -243,31 +243,31 @@ code_cache_base::code_cache_base(const boost::filesystem::path data_dir, const e created_file = true; }; + code_cache_header cache_header; + auto check_code_cache = [&] { + char header_buff[total_header_size]; + std::ifstream hs(_cache_file_path.generic_string(), std::ifstream::binary); + hs.read(header_buff, sizeof(header_buff)); + EOS_ASSERT(!hs.fail(), bad_database_version_exception, "failed to read code cache header"); + memcpy((char*)&cache_header, header_buff + header_offset, sizeof(cache_header)); + + EOS_ASSERT(cache_header.id == header_id, bad_database_version_exception, "existing EOS VM OC code cache not compatible with this version"); + EOS_ASSERT(!cache_header.dirty, database_exception, "code cache is dirty"); + }; + if (!bfs::exists(_cache_file_path)) { create_code_cache_file(); } - code_cache_header cache_header; - while (true) { - try { - { - char header_buff[total_header_size]; - std::ifstream hs(_cache_file_path.generic_string(), std::ifstream::binary); - hs.read(header_buff, sizeof(header_buff)); - EOS_ASSERT(!hs.fail(), bad_database_version_exception, "failed to read code cache header"); - memcpy((char*)&cache_header, header_buff + header_offset, sizeof(cache_header)); - } + try { + check_code_cache(); + } catch (const fc::exception&) { + if (created_file) + throw; - EOS_ASSERT(cache_header.id == header_id, bad_database_version_exception, "existing EOS VM OC code cache not compatible with this version"); - EOS_ASSERT(!cache_header.dirty, database_exception, "code cache is dirty"); - break; - } catch (const fc::exception&) { - if (created_file) - throw; - - ilog("EOS VM optimized Compiler code cache corrupt, recreating"); - create_code_cache_file(); - } + ilog("EOS VM optimized Compiler code cache corrupt, recreating"); + create_code_cache_file(); + check_code_cache(); } set_on_disk_region_dirty(true); From f5b5e64dd6c6f2610c3bb643349fd4bb1f78a994 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 17 Oct 2023 10:42:03 -0500 Subject: [PATCH 14/31] GH-1677 Add additional tests --- .../tests/session_test.cpp | 94 +++++++++++++++++-- tests/ship_streamer_test.py | 6 +- 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/plugins/state_history_plugin/tests/session_test.cpp b/plugins/state_history_plugin/tests/session_test.cpp index ee6e22d5aa..83c718ab8d 100644 --- a/plugins/state_history_plugin/tests/session_test.cpp +++ b/plugins/state_history_plugin/tests/session_test.cpp @@ -98,20 +98,22 @@ struct mock_state_history_plugin { eosio::state_history::block_position block_head; fc::temp_directory log_dir; - std::optional log; + std::optional trace_log; + std::optional state_log; std::atomic stopping = false; eosio::session_manager session_mgr{ship_ioc}; constexpr static uint32_t default_frame_size = 1024; - std::optional& get_trace_log() { return log; } - std::optional& get_chain_state_log() { return log; } + std::optional& get_trace_log() { return trace_log; } + std::optional& get_chain_state_log() { return state_log; } fc::sha256 get_chain_id() const { return {}; } boost::asio::io_context& get_ship_executor() { return ship_ioc; } void setup_state_history_log(eosio::state_history_log_config conf = {}) { - log.emplace("ship", log_dir.path(), conf); + trace_log.emplace("ship_trace", log_dir.path(), conf); + state_log.emplace("ship_state", log_dir.path(), conf); } fc::logger logger() { return fc::logger::get(DEFAULT_LOGGER); } @@ -125,7 +127,20 @@ struct mock_state_history_plugin { return fc::time_point{}; } - std::optional get_block_id(uint32_t block_num) { return block_id_for(block_num); } + std::optional get_block_id(uint32_t block_num) { + std::optional id; + if( trace_log ) { + id = trace_log->get_block_id( block_num ); + if( id ) + return id; + } + if( state_log ) { + id = state_log->get_block_id( block_num ); + if( id ) + return id; + } + return block_id_for(block_num); + } eosio::state_history::block_position get_block_head() { return block_head; } eosio::state_history::block_position get_last_irreversible() { return block_head; } @@ -346,13 +361,24 @@ struct state_history_test_fixture { header.payload_size += sizeof(uint64_t); } - server.log->write_entry(header, block_id_for(index - 1), [&](auto& f) { + std::unique_lock gt(server.trace_log->_mx); + server.trace_log->write_entry(header, block_id_for(index - 1), [&](auto& f) { f.write((const char*)&type, sizeof(type)); if (type == 1) { f.write((const char*)&decompressed_byte_count, sizeof(decompressed_byte_count)); } f.write(compressed.data(), compressed.size()); }); + gt.unlock(); + std::unique_lock gs(server.state_log->_mx); + server.state_log->write_entry(header, block_id_for(index - 1), [&](auto& f) { + f.write((const char*)&type, sizeof(type)); + if (type == 1) { + f.write((const char*)&decompressed_byte_count, sizeof(decompressed_byte_count)); + } + f.write(compressed.data(), compressed.size()); + }); + gs.unlock(); if (written_data.size() < index) written_data.resize(index); @@ -490,6 +516,62 @@ BOOST_FIXTURE_TEST_CASE(test_session_no_prune, state_history_test_fixture) { FC_LOG_AND_RETHROW() } +BOOST_FIXTURE_TEST_CASE(test_split_log, state_history_test_fixture) { + try { + // setup block head for the server + constexpr uint32_t head = 1023; + eosio::state_history::partition_config conf; + conf.stride = 25; + server.setup_state_history_log(conf); + uint32_t head_block_num = head; + server.block_head = {head_block_num, block_id_for(head_block_num)}; + + // generate the log data used for traces and deltas + uint32_t n = mock_state_history_plugin::default_frame_size; + add_to_log(1, n * sizeof(uint32_t), generate_data(n)); // original data format + add_to_log(2, 0, generate_data(n)); // format to accommodate the compressed size greater than 4GB + add_to_log(3, 1, generate_data(n)); // format to encode decompressed size to avoid decompress entire data upfront. + for (size_t i = 4; i <= head; ++i) { + add_to_log(i, 1, generate_data(n)); + } + + send_request(eosio::state_history::get_blocks_request_v0{.start_block_num = 1, + .end_block_num = UINT32_MAX, + .max_messages_in_flight = UINT32_MAX, + .have_positions = {}, + .irreversible_only = false, + .fetch_block = true, + .fetch_traces = true, + .fetch_deltas = true}); + + eosio::state_history::state_result result; + // we should get 1023 consecutive block result + eosio::chain::block_id_type prev_id; + for (int i = 0; i < head; ++i) { + receive_result(result); + BOOST_REQUIRE(std::holds_alternative(result)); + auto r = std::get(result); + BOOST_REQUIRE_EQUAL(r.head.block_num, server.block_head.block_num); + if (i > 0) { + BOOST_TEST(prev_id.str() == r.prev_block->block_id.str()); + } + prev_id = r.this_block->block_id; + BOOST_REQUIRE(r.traces.has_value()); + BOOST_REQUIRE(r.deltas.has_value()); + auto traces = r.traces.value(); + auto deltas = r.deltas.value(); + auto& data = written_data[i]; + auto data_size = data.size() * sizeof(int32_t); + BOOST_REQUIRE_EQUAL(traces.size(), data_size); + BOOST_REQUIRE_EQUAL(deltas.size(), data_size); + + BOOST_REQUIRE(std::equal(traces.begin(), traces.end(), (const char*)data.data())); + BOOST_REQUIRE(std::equal(deltas.begin(), deltas.end(), (const char*)data.data())); + } + } + FC_LOG_AND_RETHROW() +} + BOOST_FIXTURE_TEST_CASE(test_session_with_prune, state_history_test_fixture) { try { // setup block head for the server diff --git a/tests/ship_streamer_test.py b/tests/ship_streamer_test.py index 8134e4dc50..893551101c 100755 --- a/tests/ship_streamer_test.py +++ b/tests/ship_streamer_test.py @@ -78,7 +78,7 @@ def getLatestSnapshot(nodeId): shipNodeNum = 1 specificExtraNodeosArgs={} - specificExtraNodeosArgs[shipNodeNum]="--plugin eosio::state_history_plugin --disable-replay-opts --trace-history --chain-state-history --plugin eosio::net_api_plugin --plugin eosio::producer_api_plugin " + specificExtraNodeosArgs[shipNodeNum]="--plugin eosio::state_history_plugin --disable-replay-opts --trace-history --chain-state-history --state-history-stride 200 --plugin eosio::net_api_plugin --plugin eosio::producer_api_plugin " # producer nodes will be mapped to 0 through totalProducerNodes-1, so the number totalProducerNodes will be the non-producing node specificExtraNodeosArgs[totalProducerNodes]="--plugin eosio::test_control_api_plugin " @@ -214,7 +214,7 @@ def getLatestSnapshot(nodeId): prodNode0.waitForProducer(forkAtProducer) prodNode1.waitForProducer(prodNode1Prod) if nonProdNode.verifyAlive(): - Utils.errorExit("Bridge did not shutdown"); + Utils.errorExit("Bridge did not shutdown") Print("Fork started") prodNode0.waitForProducer("defproducerb") # wait for fork to progress a bit @@ -222,7 +222,7 @@ def getLatestSnapshot(nodeId): Print("Restore fork") Print("Relaunching the non-producing bridge node to connect the producing nodes again") if nonProdNode.verifyAlive(): - Utils.errorExit("Bridge is already running"); + Utils.errorExit("Bridge is already running") if not nonProdNode.relaunch(): Utils.errorExit(f"Failure - (non-production) node {nonProdNode.nodeNum} should have restarted") From 59031ab71d36c1010285aa01345b826112ff9d09 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Tue, 17 Oct 2023 13:55:26 -0500 Subject: [PATCH 15/31] GH-1776 Fix for throttling the last block of a peer_requested dropping the last block and not sending it later --- plugins/net_plugin/net_plugin.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 5a401f7cb9..9b3ed88405 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1732,10 +1732,6 @@ namespace eosio { peer_dlog( this, "enqueue sync block ${num}", ("num", peer_requested->last + 1) ); } uint32_t num = peer_requested->last + 1; - if(num == peer_requested->end_block) { - peer_requested.reset(); - peer_dlog( this, "completing enqueue_sync_block ${num}", ("num", num) ); - } controller& cc = my_impl->chain_plug->chain(); signed_block_ptr sb; @@ -1756,6 +1752,10 @@ namespace eosio { block_sync_throttling = false; block_sync_bytes_sent += enqueue_block( sb, true ); ++peer_requested->last; + if(num == peer_requested->end_block) { + peer_requested.reset(); + peer_dlog( this, "completing enqueue_sync_block ${num}", ("num", num) ); + } } else { peer_ilog( this, "enqueue sync, unable to fetch block ${num}, sending benign_other go away", ("num", num) ); peer_requested.reset(); // unable to provide requested blocks From 8954bdb978ac45909f3d898dcce2f9553df4f0ff Mon Sep 17 00:00:00 2001 From: Matt Witherspoon <32485495+spoonincode@users.noreply.github.com> Date: Tue, 17 Oct 2023 21:50:57 -0400 Subject: [PATCH 16/31] only use --eos-vm-oc-enable option when OC is available & fix location --- tests/test_read_only_trx.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_read_only_trx.cpp b/tests/test_read_only_trx.cpp index 636c7228c7..27a6c6be1e 100644 --- a/tests/test_read_only_trx.cpp +++ b/tests/test_read_only_trx.cpp @@ -204,6 +204,9 @@ BOOST_AUTO_TEST_CASE(with_3_read_only_threads) { BOOST_AUTO_TEST_CASE(with_3_read_only_threads_no_tierup) { std::vector specific_args = { "-p", "eosio", "-e", "--read-only-threads=3", +#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED + "--eos-vm-oc-enable=none", +#endif "--max-transaction-time=10", "--abi-serializer-max-time-ms=999", "--read-only-write-window-time-us=100000", @@ -215,7 +218,6 @@ BOOST_AUTO_TEST_CASE(with_3_read_only_threads_no_tierup) { BOOST_AUTO_TEST_CASE(with_8_read_only_threads) { std::vector specific_args = { "-p", "eosio", "-e", "--read-only-threads=8", - "--eos-vm-oc-enable=none", "--max-transaction-time=10", "--abi-serializer-max-time-ms=999", "--read-only-write-window-time-us=10000", @@ -227,7 +229,9 @@ BOOST_AUTO_TEST_CASE(with_8_read_only_threads) { BOOST_AUTO_TEST_CASE(with_8_read_only_threads_no_tierup) { std::vector specific_args = { "-p", "eosio", "-e", "--read-only-threads=8", +#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED "--eos-vm-oc-enable=none", +#endif "--max-transaction-time=10", "--abi-serializer-max-time-ms=999", "--read-only-write-window-time-us=10000", From a396da9c2ccee402cb3f096307e16cd657e50e03 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 18 Oct 2023 10:33:16 -0500 Subject: [PATCH 17/31] GH-1677 Simplify split_log function as the protection against out of file descriptors seems like overkill and is difficult to follow/verify is correct. --- .../include/eosio/state_history/log.hpp | 40 +------------------ 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index aca21a148c..41533cd740 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -905,32 +905,6 @@ class state_history_log { } void split_log() { - - fc::path log_file_path = log.get_file_path(); - fc::path index_file_path = index.get_file_path(); - - fc::cfile new_log_file; - fc::cfile new_index_file; - - fc::path tmp_log_file_path = log_file_path; - tmp_log_file_path.replace_extension("log.tmp"); - fc::path tmp_index_file_path = index_file_path; - tmp_index_file_path.replace_extension("index.tmp"); - - new_log_file.set_file_path(tmp_log_file_path); - new_index_file.set_file_path(tmp_index_file_path); - - try { - new_log_file.open(fc::cfile::truncate_rw_mode); - new_index_file.open(fc::cfile::truncate_rw_mode); - } catch (...) { - wlog("Unable to open new state history log or index file for writing during log spliting, " - "continue writing to existing block log file\n"); - return; - } - - new_log_file.close(); - new_index_file.close(); index.close(); log.close(); @@ -938,19 +912,9 @@ class state_history_log { _index_begin_block = _begin_block = _end_block; - using std::swap; - swap(new_log_file, log); - swap(new_index_file, index); - - fc::rename(tmp_log_file_path, log_file_path); - fc::rename(tmp_index_file_path, index_file_path); - - log.set_file_path(log_file_path); - index.set_file_path(index_file_path); - - log.open(fc::cfile::update_rw_mode); + log.open(fc::cfile::truncate_rw_mode); log.seek_end(0); - index.open(fc::cfile::create_or_update_rw_mode); + index.open(fc::cfile::truncate_rw_mode); } }; // state_history_log From 1cb6a9dd07a40d42188d2f29beb54861574c4b64 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Wed, 18 Oct 2023 10:45:18 -0500 Subject: [PATCH 18/31] GH-1677 Use EOS_ASSERT instead of elog/assert --- .../include/eosio/state_history/log.hpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index 41533cd740..b1ed4f4926 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -578,19 +578,14 @@ class state_history_log { if (block_num >= _begin_block && block_num < _end_block) { state_history_log_header header; get_entry(block_num, header); - if (chain::block_header::num_from_id(header.block_id) != block_num) { // entry does not match requested block num - elog("id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(header.block_id))("b", block_num)); - assert(false); - } + EOS_ASSERT(chain::block_header::num_from_id(header.block_id) == block_num, chain::plugin_exception, + "header id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(header.block_id))("b", block_num)); return header.block_id; } return {}; } - if (chain::block_header::num_from_id(*result) != block_num) { // catalog failed to fetch correct block id - elog("id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(*result))("b", block_num)); - assert(false); - } - + EOS_ASSERT(chain::block_header::num_from_id(*result) == block_num, chain::plugin_exception, + "catalog id does not match requested ${a} != ${b}", ("a", chain::block_header::num_from_id(*result))("b", block_num)); return result; } From 0c5ff7e739c7e3fde00aaf0697b2e326c527a9ff Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 19 Oct 2023 08:29:04 -0500 Subject: [PATCH 19/31] GH-1784 Rename cpu-effort-percent to produce-block-offset-ms and change meaning to be over complete round --- .../03_plugins/producer_plugin/index.md | 17 ++----- .../chain/include/eosio/chain/config.hpp | 2 +- .../producer_plugin/block_timing_util.hpp | 14 +++--- .../eosio/producer_plugin/producer_plugin.hpp | 4 +- plugins/producer_plugin/producer_plugin.cpp | 47 ++++++++++--------- .../test/test_block_timing_util.cpp | 22 ++++----- tests/PerformanceHarness/README.md | 26 +++++----- .../performance_test_basic.py | 4 +- tests/p2p_high_latency_test.py | 3 +- 9 files changed, 67 insertions(+), 72 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/index.md b/docs/01_nodeos/03_plugins/producer_plugin/index.md index 533aaf5427..11157f5936 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/index.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/index.md @@ -72,20 +72,9 @@ Config Options for eosio::producer_plugin: can extend during low usage (only enforced subjectively; use 1000 to not enforce any limit) - --produce-time-offset-us arg (=0) Offset of non last block producing time - in microseconds. Valid range 0 .. - -block_time_interval. - --last-block-time-offset-us arg (=-200000) - Offset of last block producing time in - microseconds. Valid range 0 .. - -block_time_interval. - --cpu-effort-percent arg (=80) Percentage of cpu block production time - used to produce block. Whole number - percentages, e.g. 80 for 80% - --last-block-cpu-effort-percent arg (=80) - Percentage of cpu block production time - used to produce last block. Whole - number percentages, e.g. 80 for 80% + --produce-block-offset-ms arg (=450) The number of milliseconds early the + last block of a production round should + be produced. --max-block-cpu-usage-threshold-us arg (=5000) Threshold of CPU block production to consider block full; when within diff --git a/libraries/chain/include/eosio/chain/config.hpp b/libraries/chain/include/eosio/chain/config.hpp index 6d81272944..48a1d4de94 100644 --- a/libraries/chain/include/eosio/chain/config.hpp +++ b/libraries/chain/include/eosio/chain/config.hpp @@ -76,7 +76,7 @@ const static uint32_t default_max_inline_action_size = 512 * 102 const static uint16_t default_max_inline_action_depth = 4; const static uint16_t default_max_auth_depth = 6; const static uint32_t default_sig_cpu_bill_pct = 50 * percent_1; // billable percentage of signature recovery -const static uint32_t default_block_cpu_effort_pct = 90 * percent_1; // percentage of block time used for producing block +const static uint32_t default_produce_block_offset_ms = 450; const static uint16_t default_controller_thread_pool_size = 2; const static uint32_t default_max_variable_signature_length = 16384u; const static uint32_t default_max_action_return_value_size = 256; diff --git a/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp b/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp index b4e3741874..f4d68061c7 100644 --- a/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp +++ b/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp @@ -42,19 +42,19 @@ namespace block_timing_util { // In the past, a producer would always start a block `config::block_interval_us` ahead of its block time. However, // it causes the last block in a block production round being released too late for the next producer to have // received it and start producing on schedule. To mitigate the problem, we leave no time gap in block producing. For - // example, given block_interval=500 ms and cpu effort=400 ms, assuming the our round start at time point 0; in the + // example, given block_interval=500 ms and cpu effort=400 ms, assuming our round starts at time point 0; in the // past, the block start time points would be at time point -500, 0, 500, 1000, 1500, 2000 .... With this new // approach, the block time points would become -500, -100, 300, 700, 1100 ... - inline fc::time_point production_round_block_start_time(uint32_t cpu_effort_us, chain::block_timestamp_type block_time) { + inline fc::time_point production_round_block_start_time(fc::microseconds cpu_effort_us, chain::block_timestamp_type block_time) { uint32_t block_slot = block_time.slot; uint32_t production_round_start_block_slot = (block_slot / chain::config::producer_repetitions) * chain::config::producer_repetitions; uint32_t production_round_index = block_slot % chain::config::producer_repetitions; return chain::block_timestamp_type(production_round_start_block_slot - 1).to_time_point() + - fc::microseconds(cpu_effort_us * production_round_index); + fc::microseconds(cpu_effort_us.count() * production_round_index); } - inline fc::time_point calculate_producing_block_deadline(uint32_t cpu_effort_us, chain::block_timestamp_type block_time) { + inline fc::time_point calculate_producing_block_deadline(fc::microseconds cpu_effort_us, chain::block_timestamp_type block_time) { auto estimated_deadline = production_round_block_start_time(cpu_effort_us, block_time) + fc::microseconds(cpu_effort_us); auto now = fc::time_point::now(); if (estimated_deadline > now) { @@ -62,8 +62,8 @@ namespace block_timing_util { } else { // This could only happen when the producer stop producing and then comes back alive in the middle of its own // production round. In this case, we just use the hard deadline. - const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort_us); - return std::min(hard_deadline, now + fc::microseconds(cpu_effort_us)); + const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort_us.count()); + return std::min(hard_deadline, now + cpu_effort_us); } } @@ -118,7 +118,7 @@ namespace block_timing_util { // Return the *next* block start time according to its block time slot. // Returns empty optional if no producers are in the active_schedule. // block_num is only used for watermark minimum offset. - inline std::optional calculate_producer_wake_up_time(uint32_t cpu_effort_us, uint32_t block_num, + inline std::optional calculate_producer_wake_up_time(fc::microseconds cpu_effort_us, uint32_t block_num, const chain::block_timestamp_type& ref_block_time, const std::set& producers, const std::vector& active_schedule, diff --git a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp index f2c7e914f1..0f38ef35f6 100644 --- a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp +++ b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp @@ -17,7 +17,7 @@ class producer_plugin : public appbase::plugin { struct runtime_options { std::optional max_transaction_time; std::optional max_irreversible_block_age; - std::optional cpu_effort_us; + std::optional produce_block_offset_ms; std::optional subjective_cpu_leeway_us; std::optional greylist_limit; }; @@ -196,7 +196,7 @@ class producer_plugin : public appbase::plugin { } //eosio -FC_REFLECT(eosio::producer_plugin::runtime_options, (max_transaction_time)(max_irreversible_block_age)(cpu_effort_us)(subjective_cpu_leeway_us)(greylist_limit)); +FC_REFLECT(eosio::producer_plugin::runtime_options, (max_transaction_time)(max_irreversible_block_age)(produce_block_offset_ms)(subjective_cpu_leeway_us)(greylist_limit)); FC_REFLECT(eosio::producer_plugin::greylist_params, (accounts)); FC_REFLECT(eosio::producer_plugin::whitelist_blacklist, (actor_whitelist)(actor_blacklist)(contract_whitelist)(contract_blacklist)(action_blacklist)(key_blacklist) ) FC_REFLECT(eosio::producer_plugin::integrity_hash_information, (head_block_id)(integrity_hash)) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index d574254c73..7fc20de7cc 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -371,7 +371,6 @@ class producer_plugin_impl : public std::enable_shared_from_thischain().get_subjective_cpu_leeway() ? chain_plug->chain().get_subjective_cpu_leeway()->count() : std::optional(), chain_plug->chain().get_greylist_limit()}; @@ -501,14 +500,14 @@ class producer_plugin_impl : public std::enable_shared_from_this _max_transaction_time_ms; // modified by app thread, read by net_plugin thread pool std::atomic _received_block{0}; // modified by net_plugin thread pool fc::microseconds _max_irreversible_block_age_us; - int32_t _cpu_effort_us = 0; + // produce-block-offset is in terms of the complete round, internally use calculated value for each block of round + fc::microseconds _produce_block_cpu_effort_us; fc::time_point _pending_block_deadline; uint32_t _max_block_cpu_usage_threshold_us = 0; uint32_t _max_block_net_usage_threshold_bytes = 0; bool _disable_subjective_p2p_billing = true; bool _disable_subjective_api_billing = true; fc::time_point _irreversible_block_time; - fc::time_point _idle_trx_time{fc::time_point::now()}; std::vector _protocol_features_to_activate; bool _protocol_features_signaled = false; // to mark whether it has been signaled in start_block @@ -613,6 +612,17 @@ class producer_plugin_impl : public std::enable_shared_from_this next); + void set_produce_block_offset(uint32_t produce_block_offset_ms) { + EOS_ASSERT(produce_block_offset_ms < (config::producer_repetitions * config::block_interval_ms), plugin_config_exception, + "produce-block-offset-ms ${p} must be [0 - ${max})", ("p", produce_block_offset_ms)("max", config::producer_repetitions * config::block_interval_ms)); + _produce_block_cpu_effort_us = fc::milliseconds( config::block_interval_ms - produce_block_offset_ms / config::producer_repetitions ); + } + + fc::microseconds get_produce_block_offset() const { + return fc::milliseconds( (config::block_interval_ms * config::producer_repetitions) - + ((_produce_block_cpu_effort_us.count()/1000) * config::producer_repetitions) ); + } + void on_block(const block_state_ptr& bsp) { auto& chain = chain_plug->chain(); auto before = _unapplied_transactions.size(); @@ -1033,8 +1043,8 @@ void producer_plugin::set_program_options( "account that can not access to extended CPU/NET virtual resources") ("greylist-limit", boost::program_options::value()->default_value(1000), "Limit (between 1 and 1000) on the multiple that CPU/NET virtual resources can extend during low usage (only enforced subjectively; use 1000 to not enforce any limit)") - ("cpu-effort-percent", bpo::value()->default_value(config::default_block_cpu_effort_pct / config::percent_1), - "Percentage of cpu block production time used to produce block. Whole number percentages, e.g. 80 for 80%") + ("produce-block-offset-ms", bpo::value()->default_value(config::default_produce_block_offset_ms), + "The number of milliseconds early the last block of a production round should be produced.") ("max-block-cpu-usage-threshold-us", bpo::value()->default_value( 5000 ), "Threshold of CPU block production to consider block full; when within threshold of max-block-cpu-usage block can be produced immediately") ("max-block-net-usage-threshold-bytes", bpo::value()->default_value( 1024 ), @@ -1126,12 +1136,7 @@ void producer_plugin_impl::plugin_initialize(const boost::program_options::varia _account_fails.set_max_failures_per_account(options.at("subjective-account-max-failures").as(), subjective_account_max_failures_window_size); - uint32_t cpu_effort_pct = options.at("cpu-effort-percent").as(); - EOS_ASSERT(cpu_effort_pct >= 0 && cpu_effort_pct <= 100, plugin_config_exception, - "cpu-effort-percent ${pct} must be 0 - 100", ("pct", cpu_effort_pct)); - cpu_effort_pct *= config::percent_1; - - _cpu_effort_us = EOS_PERCENT(config::block_interval_us, cpu_effort_pct); + set_produce_block_offset(options.at("produce-block-offset-ms").as()); _max_block_cpu_usage_threshold_us = options.at("max-block-cpu-usage-threshold-us").as(); EOS_ASSERT(_max_block_cpu_usage_threshold_us < config::block_interval_us, @@ -1441,8 +1446,8 @@ void producer_plugin_impl::update_runtime_options(const producer_plugin::runtime check_speculating = true; } - if (options.cpu_effort_us) { - _cpu_effort_us = *options.cpu_effort_us; + if (options.produce_block_offset_ms) { + set_produce_block_offset(*options.produce_block_offset_ms); } if (check_speculating && in_speculating_mode()) { @@ -1838,10 +1843,10 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() { } } - _pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_cpu_effort_us, block_time); + _pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort_us, block_time); } else if (!_producers.empty()) { // cpu effort percent doesn't matter for the first block of the round, use max (block_interval_us) for cpu effort - auto wake_time = block_timing_util::calculate_producer_wake_up_time(config::block_interval_us, chain.head_block_num(), chain.head_block_time(), + auto wake_time = block_timing_util::calculate_producer_wake_up_time(fc::microseconds(config::block_interval_us), chain.head_block_num(), chain.head_block_time(), _producers, chain.head_block_state()->active_schedule.producers, _producer_watermarks); if (wake_time) @@ -2004,8 +2009,8 @@ bool producer_plugin_impl::remove_expired_trxs(const fc::time_point& deadline) { }); if (exhausted && in_producing_mode()) { - fc_wlog(_log, "Unable to process all expired transactions of the ${n} transactions in the unapplied queue before deadline, " - "Expired ${expired}", ("n", orig_count)("expired", num_expired)); + fc_wlog(_log, "Unable to process all expired transactions of the ${n} transactions in the unapplied queue before deadline ${d}, " + "Expired ${expired}", ("n", orig_count)("d", deadline)("expired", num_expired)); } else { fc_dlog(_log, "Processed ${ex} expired transactions of the ${n} transactions in the unapplied queue.", ("n", orig_count)("ex", num_expired)); } @@ -2473,7 +2478,7 @@ void producer_plugin_impl::schedule_production_loop() { if (!_producers.empty() && !production_disabled_by_policy()) { chain::controller& chain = chain_plug->chain(); fc_dlog(_log, "Waiting till another block is received and scheduling Speculative/Production Change"); - auto wake_time = block_timing_util::calculate_producer_wake_up_time(_cpu_effort_us, chain.head_block_num(), calculate_pending_block_time(), + auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort_us, chain.head_block_num(), calculate_pending_block_time(), _producers, chain.head_block_state()->active_schedule.producers, _producer_watermarks); schedule_delayed_production_loop(weak_from_this(), wake_time); @@ -2492,7 +2497,7 @@ void producer_plugin_impl::schedule_production_loop() { chain::controller& chain = chain_plug->chain(); fc_dlog(_log, "Speculative Block Created; Scheduling Speculative/Production Change"); EOS_ASSERT(chain.is_building_block(), missing_pending_block_state, "speculating without pending_block_state"); - auto wake_time = block_timing_util::calculate_producer_wake_up_time(_cpu_effort_us, chain.pending_block_num(), chain.pending_block_timestamp(), + auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort_us, chain.pending_block_num(), chain.pending_block_timestamp(), _producers, chain.head_block_state()->active_schedule.producers, _producer_watermarks); schedule_delayed_production_loop(weak_from_this(), wake_time); @@ -2509,7 +2514,7 @@ void producer_plugin_impl::schedule_maybe_produce_block(bool exhausted) { assert(in_producing_mode()); // we succeeded but block may be exhausted static const boost::posix_time::ptime epoch(boost::gregorian::date(1970, 1, 1)); - auto deadline = block_timing_util::calculate_producing_block_deadline(_cpu_effort_us, chain.pending_block_time()); + auto deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort_us, chain.pending_block_time()); if (!exhausted && deadline > fc::time_point::now()) { // ship this block off no later than its deadline diff --git a/plugins/producer_plugin/test/test_block_timing_util.cpp b/plugins/producer_plugin/test/test_block_timing_util.cpp index efb045b477..a3a43452e8 100644 --- a/plugins/producer_plugin/test/test_block_timing_util.cpp +++ b/plugins/producer_plugin/test/test_block_timing_util.cpp @@ -25,7 +25,7 @@ BOOST_AUTO_TEST_CASE(test_production_round_block_start_time) { for (int i = 0; i < eosio::chain::config::producer_repetitions; ++i, expected_start_time = expected_start_time + cpu_effort) { auto block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + i); - BOOST_CHECK_EQUAL(eosio::block_timing_util::production_round_block_start_time(cpu_effort_us, block_time), expected_start_time); + BOOST_CHECK_EQUAL(eosio::block_timing_util::production_round_block_start_time(cpu_effort, block_time), expected_start_time); } } @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) { for (int i = 0; i < eosio::chain::config::producer_repetitions; ++i) { auto block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + i); auto expected_deadline = block_time.to_time_point() - fc::milliseconds((i + 1) * 100); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, block_time), expected_deadline); fc::mock_time_traits::set_now(expected_deadline); } @@ -56,18 +56,18 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) { auto second_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 1); fc::mock_time_traits::set_now(second_block_time.to_time_point() - fc::milliseconds(200)); auto second_block_hard_deadline = second_block_time.to_time_point() - fc::milliseconds(100); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, second_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, second_block_time), second_block_hard_deadline); // use previous deadline as now fc::mock_time_traits::set_now(second_block_hard_deadline); auto third_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 2); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, third_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, third_block_time), third_block_time.to_time_point() - fc::milliseconds(300)); // use previous deadline as now fc::mock_time_traits::set_now(third_block_time.to_time_point() - fc::milliseconds(300)); auto forth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 3); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, forth_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, forth_block_time), forth_block_time.to_time_point() - fc::milliseconds(400)); /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -75,21 +75,21 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) { auto seventh_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 6); fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(500)); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, seventh_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, seventh_block_time), seventh_block_time.to_time_point() - fc::milliseconds(100)); // use previous deadline as now fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(100)); auto eighth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 7); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, eighth_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, eighth_block_time), eighth_block_time.to_time_point() - fc::milliseconds(200)); // use previous deadline as now fc::mock_time_traits::set_now(eighth_block_time.to_time_point() - fc::milliseconds(200)); auto ninth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 8); - BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort_us, ninth_block_time), + BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, ninth_block_time), ninth_block_time.to_time_point() - fc::milliseconds(300)); } } @@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(test_calculate_producer_wake_up_time) { producer_watermarks empty_watermarks; // use full cpu effort for most of these tests since calculate_producing_block_deadline is tested above - constexpr uint32_t full_cpu_effort = eosio::chain::config::block_interval_us; + constexpr fc::microseconds full_cpu_effort = fc::microseconds{eosio::chain::config::block_interval_us}; { // no producers BOOST_CHECK_EQUAL(calculate_producer_wake_up_time(full_cpu_effort, 2, chain::block_timestamp_type{}, {}, {}, empty_watermarks), std::optional{}); @@ -206,7 +206,7 @@ BOOST_AUTO_TEST_CASE(test_calculate_producer_wake_up_time) { BOOST_CHECK_EQUAL(calculate_producer_wake_up_time(full_cpu_effort, 2, block_timestamp, producers, active_schedule, empty_watermarks), expected_block_time); // cpu_effort at 50%, initc - constexpr uint32_t half_cpu_effort = eosio::chain::config::block_interval_us / 2u; + constexpr fc::microseconds half_cpu_effort = fc::microseconds{eosio::chain::config::block_interval_us / 2u}; producers = std::set{ "initc"_n }; block_timestamp = block_timestamp_type(prod_round_1st_block_slot); expected_block_time = block_timestamp_type(prod_round_1st_block_slot + 2*config::producer_repetitions).to_time_point(); @@ -221,7 +221,7 @@ BOOST_AUTO_TEST_CASE(test_calculate_producer_wake_up_time) { block_timestamp = block_timestamp_type(prod_round_1st_block_slot + 2*config::producer_repetitions + 2); // second in round is 50% sooner expected_block_time = block_timestamp.to_time_point(); - expected_block_time -= fc::microseconds(2*half_cpu_effort); + expected_block_time -= fc::microseconds(2*half_cpu_effort.count()); BOOST_CHECK_EQUAL(calculate_producer_wake_up_time(half_cpu_effort, 2, block_timestamp, producers, active_schedule, empty_watermarks), expected_block_time); } { // test watermark diff --git a/tests/PerformanceHarness/README.md b/tests/PerformanceHarness/README.md index 15c2fadc00..42ff944ef0 100644 --- a/tests/PerformanceHarness/README.md +++ b/tests/PerformanceHarness/README.md @@ -504,7 +504,7 @@ usage: PerformanceHarnessScenarioRunner.py findMax testBpOpMode overrideBasicTes [--cluster-log-lvl {all,debug,info,warn,error,off}] [--net-threads NET_THREADS] [--disable-subjective-billing DISABLE_SUBJECTIVE_BILLING] - [--cpu-effort-percent CPU_EFFORT_PERCENT] + [--produce-block-offset-ms PRODUCE_BLOCK_OFFSET_MS] [--producer-threads PRODUCER_THREADS] [--read-only-write-window-time-us READ_ONLY_WRITE_WINDOW_TIME_US] [--read-only-read-window-time-us READ_ONLY_READ_WINDOW_TIME_US] @@ -579,8 +579,9 @@ Performance Test Basic Base: Number of worker threads in net_plugin thread pool --disable-subjective-billing DISABLE_SUBJECTIVE_BILLING Disable subjective CPU billing for API/P2P transactions - --cpu-effort-percent CPU_EFFORT_PERCENT - Percentage of cpu block production time used to produce block. Whole number percentages, e.g. 80 for 80% + --produce-block-offset-ms PRODUCE_BLOCK_OFFSET_MS + The number of milliseconds early the last block of a production round should + be produced. --producer-threads PRODUCER_THREADS Number of worker threads in producer thread pool --read-only-write-window-time-us READ_ONLY_WRITE_WINDOW_TIME_US @@ -663,7 +664,7 @@ The following classes and scripts are typically used by the Performance Harness [--cluster-log-lvl {all,debug,info,warn,error,off}] [--net-threads NET_THREADS] [--disable-subjective-billing DISABLE_SUBJECTIVE_BILLING] - [--cpu-effort-percent CPU_EFFORT_PERCENT] + [--produce-block-offset-ms PRODUCE_BLOCK_OFFSET_MS] [--producer-threads PRODUCER_THREADS] [--http-max-in-flight-requests HTTP_MAX_IN_FLIGHT_REQUESTS] [--http-max-response-time-ms HTTP_MAX_RESPONSE_TIME_MS] @@ -742,8 +743,9 @@ Performance Test Basic Base: Number of worker threads in net_plugin thread pool (default: 4) --disable-subjective-billing DISABLE_SUBJECTIVE_BILLING Disable subjective CPU billing for API/P2P transactions (default: True) - --cpu-effort-percent CPU_EFFORT_PERCENT - Percentage of cpu block production time used to produce block. Whole number percentages, e.g. 80 for 80% (default: 100) + --produce-block-offset-ms PRODUCE_BLOCK_OFFSET_MS + The number of milliseconds early the last block of a production round should + be produced. --producer-threads PRODUCER_THREADS Number of worker threads in producer thread pool (default: 2) --http-max-in-flight-requests HTTP_MAX_IN_FLIGHT_REQUESTS @@ -1586,9 +1588,9 @@ Finally, the full detail test report for each of the determined max TPS throughp "greylistLimit": null, "_greylistLimitNodeosDefault": 1000, "_greylistLimitNodeosArg": "--greylist-limit", - "cpuEffortPercent": 100, - "_cpuEffortPercentNodeosDefault": 90, - "_cpuEffortPercentNodeosArg": "--cpu-effort-percent", + "produceBlockOffsetMs": 0, + "_produceBlockOffsetMsDefault": 450, + "_produceBlockOffsetMsArg": "--produce-block-offset-ms", "maxBlockCpuUsageThresholdUs": null, "_maxBlockCpuUsageThresholdUsNodeosDefault": 5000, "_maxBlockCpuUsageThresholdUsNodeosArg": "--max-block-cpu-usage-threshold-us", @@ -2246,9 +2248,9 @@ The Performance Test Basic generates, by default, a report that details results "greylistLimit": null, "_greylistLimitNodeosDefault": 1000, "_greylistLimitNodeosArg": "--greylist-limit", - "cpuEffortPercent": 100, - "_cpuEffortPercentNodeosDefault": 90, - "_cpuEffortPercentNodeosArg": "--cpu-effort-percent", + "produceBlockOffsetMs": 0, + "_produceBlockOffsetMsDefault": 450, + "_produceBlockOffsetMsArg": "--produce-block-offset-ms", "maxBlockCpuUsageThresholdUs": null, "_maxBlockCpuUsageThresholdUsNodeosDefault": 5000, "_maxBlockCpuUsageThresholdUsNodeosArg": "--max-block-cpu-usage-threshold-us", diff --git a/tests/PerformanceHarness/performance_test_basic.py b/tests/PerformanceHarness/performance_test_basic.py index 5fc66778b9..f3448e56cd 100755 --- a/tests/PerformanceHarness/performance_test_basic.py +++ b/tests/PerformanceHarness/performance_test_basic.py @@ -663,7 +663,7 @@ def setupClusterConfig(args) -> ClusterConfig: producerPluginArgs = ProducerPluginArgs(disableSubjectiveApiBilling=args.disable_subjective_billing, disableSubjectiveP2pBilling=args.disable_subjective_billing, - cpuEffortPercent=args.cpu_effort_percent, + produceBlockOffsetMs=args.produce_block_offset_ms, producerThreads=args.producer_threads, maxTransactionTime=-1, readOnlyWriteWindowTimeUs=args.read_only_write_window_time_us, readOnlyReadWindowTimeUs=args.read_only_read_window_time_us) @@ -720,7 +720,7 @@ def _createBaseArgumentParser(defEndpointApiDef: str, defProdNodeCnt: int, defVa choices=["all", "debug", "info", "warn", "error", "off"], default="info") ptbBaseParserGroup.add_argument("--net-threads", type=int, help=argparse.SUPPRESS if suppressHelp else "Number of worker threads in net_plugin thread pool", default=4) ptbBaseParserGroup.add_argument("--disable-subjective-billing", type=bool, help=argparse.SUPPRESS if suppressHelp else "Disable subjective CPU billing for API/P2P transactions", default=True) - ptbBaseParserGroup.add_argument("--cpu-effort-percent", type=int, help=argparse.SUPPRESS if suppressHelp else "Percentage of cpu block production time used to produce block. Whole number percentages, e.g. 80 for 80%%", default=100) + ptbBaseParserGroup.add_argument("--produce-block-offset-ms", type=int, help=argparse.SUPPRESS if suppressHelp else "The number of milliseconds early the last block of a production round should be produced.", default=0) ptbBaseParserGroup.add_argument("--producer-threads", type=int, help=argparse.SUPPRESS if suppressHelp else "Number of worker threads in producer thread pool", default=2) ptbBaseParserGroup.add_argument("--read-only-write-window-time-us", type=int, help=argparse.SUPPRESS if suppressHelp else "Time in microseconds the write window lasts.", default=200000) ptbBaseParserGroup.add_argument("--read-only-read-window-time-us", type=int, help=argparse.SUPPRESS if suppressHelp else "Time in microseconds the read window lasts.", default=60000) diff --git a/tests/p2p_high_latency_test.py b/tests/p2p_high_latency_test.py index 861fade6a0..2b8028209c 100644 --- a/tests/p2p_high_latency_test.py +++ b/tests/p2p_high_latency_test.py @@ -68,8 +68,7 @@ def exec(cmd): try: TestHelper.printSystemInfo("BEGIN") - traceNodeosArgs=" --plugin eosio::producer_plugin --produce-time-offset-us 0 --last-block-time-offset-us 0 --cpu-effort-percent 100 \ - --last-block-cpu-effort-percent 100 --producer-threads 1 --plugin eosio::net_plugin --net-threads 1" + traceNodeosArgs=" --plugin eosio::producer_plugin --produce-block-offset-ms 0 --producer-threads 1 --plugin eosio::net_plugin --net-threads 1" if cluster.launch(pnodes=1, totalNodes=totalNodes, totalProducers=1, specificExtraNodeosArgs=specificExtraNodeosArgs, extraNodeosArgs=traceNodeosArgs) is False: Utils.cmdError("launcher") Utils.errorExit("Failed to stand up eos cluster.") From aee1c1d59f5533c9a604e1534711736d78803b05 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Thu, 19 Oct 2023 13:59:59 -0500 Subject: [PATCH 20/31] GH-1677 Make sure we append to index file --- libraries/state_history/include/eosio/state_history/log.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libraries/state_history/include/eosio/state_history/log.hpp b/libraries/state_history/include/eosio/state_history/log.hpp index b1ed4f4926..d3765080d5 100644 --- a/libraries/state_history/include/eosio/state_history/log.hpp +++ b/libraries/state_history/include/eosio/state_history/log.hpp @@ -541,6 +541,7 @@ class state_history_log { "wrote payload with incorrect size to ${name}.log", ("name", name)); fc::raw::pack(log, pos); + index.seek_end(0); fc::raw::pack(index, pos); if (_begin_block == _end_block) _index_begin_block = _begin_block = block_num; From 5ca96dbb9ea4ebf41288ac5868517d2e581a45f8 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 07:42:09 -0500 Subject: [PATCH 21/31] GH-1784 Update block producing doc --- .../10_block-producing-explained.md | 112 +++++++----------- 1 file changed, 43 insertions(+), 69 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md index 1c3d56ef14..147785d519 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md @@ -4,87 +4,61 @@ content_title: Block Production Explained For simplicity of the explanation let's consider the following notations: -m = max_block_cpu_usage +* `r` = `producer_repetitions = 12` (hard-coded value) +* `m` = `max_block_cpu_usage` (on-chain consensus value) +* `n` = `max_block_net_usage` (on-chain consensus value) +* `t` = `block-time` +* `e` = `produce-block-offset-ms` (nodeos configuration) +* `w` = `block-time-interval = 500ms` (hard-coded value) +* `a` = `produce-block-early-amount = w - (w - (e / r)) = e / r ms` (how much to release each block of round early by) +* `l` = `produce-block-time = t - a` +* `p` = `produce block time window = w - a` (amount of wall clock time to produce a block) +* `c` = `billed_cpu_in_block = minimum(m, w - a)` +* `n` = `network tcp/ip latency` +* `h` = `block header validation time ms` + +Peer validation for similar hardware/version/config will be <= `m` + +**Let's consider for exemplification the following two BPs and their network topology as depicted in the below diagram** -t = block-time - -e = last-block-cpu-effort-percent - -w = block_time_interval = 500ms - -a = produce-block-early-amount = (w - w*e/100) ms - -p = produce-block-time; p = t - a - -c = billed_cpu_in_block = minimum(m, w - a) - -n = network tcp/ip latency - -peer validation for similar hardware/eosio-version/config will be <= m - -**Let's consider for exemplification the following four BPs and their network topology as depicted in below diagram** - - -```dot-svg -#p2p_local_chain_prunning.dot - local chain prunning -# -#notes: * to see image copy/paste to https://dreampuf.github.io/GraphvizOnline -# * image will be rendered by gatsby-remark-graphviz plugin in eosio docs. - -digraph { - newrank=true #allows ranks inside subgraphs (important!) - compound=true #allows edges connecting nodes with subgraphs - graph [rankdir=LR] - node [style=filled, fillcolor=lightgray, shape=square, fixedsize=true, width=.55, fontsize=10] - edge [dir=both, arrowsize=.6, weight=100] - splines=false - - subgraph cluster_chain { - label="Block Producers Peers"; labelloc="b" - graph [color=invis] - b0 [label="...", color=invis, style=""] - b1 [label="BP-A"]; b2 [label="BP-A\nPeer"]; b3 [label="BP-B\nPeer"]; b4 [label="BP-B"] - b5 [label="...", color=invis, style=""] - b0 -> b1 -> b2 -> b3 -> b4 -> b5 - } //cluster_chain - -} //digraph +``` + +------+ +------+ +------+ +------+ + -->| BP-A |---->| BP-A |------>| BP-B |---->| BP-B | + +------+ | Peer | | Peer | +------+ + +------+ +------+ ``` -`BP-A` will send block at `p` and, - -`BP-B` needs block at time `t` or otherwise will drop it. +`BP-A` will send block at `l` and, `BP-B` needs block at time `t` or otherwise will drop it. If `BP-A`is producing 12 blocks as follows `b(lock) at t(ime) 1`, `bt 1.5`, `bt 2`, `bt 2.5`, `bt 3`, `bt 3.5`, `bt 4`, `bt 4.5`, `bt 5`, `bt 5.5`, `bt 6`, `bt 6.5` then `BP-B` needs `bt 6.5` by time `6.5` so it has `.5` to produce `bt 7`. Please notice that the time of `bt 7` minus `.5` equals the time of `bt 6.5` therefore time `t` is the last block time of `BP-A` and when `BP-B` needs to start its first block. -## Example 1 -`BP-A` has 50% e, m = 200ms, c = 200ms, n = 0ms, a = 250ms: -`BP-A` sends at (t-250ms) <-> `BP-A-Peer` processes for 200ms and sends at (t - 50ms) <-> `BP-B-Peer` processes for 200ms and sends at (t + 150ms) <-> arrive at `BP-B` 150ms too late. - -## Example 2 -`BP-A` has 40% e and m = 200ms, c = 200ms, n = 0ms, a = 300ms: -(t-300ms) <-> (+200ms) <-> (+200ms) <-> arrive at `BP-B` 100ms too late. +A block is produced and sent when either it reaches `m` or `n` or `p`. -## Example 3 -`BP-A` has 30% e and m = 200ms, c = 150ms, n = 0ms, a = 350ms: -(t-350ms) <-> (+150ms) <-> (+150ms) <-> arrive at `BP-B` with 50ms to spare. +Starting in Leap 4.0, blocks are propagated after block header validation. This means instead of `BP-A Peer` & `BP-B Peer` taking `m` time to validate and forward a block it only takes a small number of milliseconds to verify the block header and then forward the block. -## Example 4 -`BP-A` has 25% e and m = 200ms, c = 125ms, n = 0ms, a = 375ms: -(t-375ms) <-> (+125ms) <-> (+125ms) <-> arrive at `BP-B` with 125ms to spare. +Starting in Leap 5.0, blocks in a round are started immediately after the completion of the previous block. Before 5.0, blocks were always started on `w` intervals and a node would "sleep" between blocks if needed. In 5.0, the "sleeps" are all moved to the end of the block production round. -## Example 5 -`BP-A` has 10% e and m = 200ms, c = 50ms, n = 0ms, a = 450ms: -(t-450ms) <-> (+50ms) <-> (+50ms) <-> arrive at `BP-B` with 350ms to spare. +## Example 1: block arrives 110ms early (zero network latency between BP and immediate peer) +* `BP-A` has e = 120, n = 0ms, h = 5ms, a = 10ms +* `BP-A` sends b1 at `t1-10ms` => `BP-A-Peer` processes `h=5ms`, sends at `t-5ms` => `BP-B-Peer` processes `h=5ms`, sends at `t-0ms` => arrives at `BP-B` at `t`. +* `BP-A` starts b2 at `t1-10ms`, sends b2 at `t2-20ms` => `BP-A-Peer` processes `h=5ms`, sends at `t2-15ms` => `BP-B-Peer` processes `h=5ms`, sends at `t2-10ms` => arrives at `BP-B` at `t2-10ms`. +* `BP-A` starts b3 at `t2-20ms`, ... +* `BP-A` starts b12 at `t11-110ms`, sends b12 at `t12-120ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-115ms` => `BP-B-Peer` processes `h=5ms`, sends at `t12-110ms` => arrives at `BP-B` at `t12-110ms` -## Example 6 -`BP-A` has 10% e and m = 200ms, c = 50ms, n = 15ms, a = 450ms: -(t-450ms) <- +15ms -> (+50ms) <- +15ms -> (+50ms) <- +15ms -> `BP-B` <-> arrive with 305ms to spare. +## Example 2: block arrives 80ms early (zero network latency between BP and immediate peer) +* `BP-A` has e = 240, n = 150ms, h = 5ms, a = 20ms +* `BP-A` sends b1 at `t1-20ms` => `BP-A-Peer` processes `h=5ms`, sends at `t-15ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t+140ms` => arrives at `BP-B` at `t+140ms`. +* `BP-A` starts b2 at `t1-20ms`, sends b2 at `t2-40ms` => `BP-A-Peer` processes `h=5ms`, sends at `t2-35ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t2+120ms` => arrives at `BP-B` at `t2+120ms`. +* `BP-A` starts b3 at `t2-40ms`, ... +* `BP-A` starts b12 at `t11-220ms`, sends b12 at `t12-240ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-235ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t12-80ms` => arrives at `BP-B` at `t12-80ms` -## Example 7 -Example world-wide network:`BP-A`has 10% e and m = 200ms, c = 50ms, n = 15ms/250ms, a = 450ms: -(t-450ms) <- +15ms -> (+50ms) <- +250ms -> (+50ms) <- +15ms -> `BP-B` <-> arrive with 70ms to spare. +## Example 3: block arrives 16ms late and is dropped (zero network latency between BP and immediate peer) +* `BP-A` has e = 204, n = 200ms, h = 10ms, a = 17ms +* `BP-A` sends b1 at `t1-17ms` => `BP-A-Peer` processes `h=10ms`, sends at `t-7ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t+203ms` => arrives at `BP-B` at `t+203ms`. +* `BP-A` starts b2 at `t1-17ms`, sends b2 at `t2-34ms` => `BP-A-Peer` processes `h=10ms`, sends at `t2-24ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2+186ms` => arrives at `BP-B` at `t2+186ms`. +* `BP-A` starts b3 at `t2-34ms`, ... +* `BP-A` starts b12 at `t11-187ms`, sends b12 at `t12-204ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-194ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t12+16ms` => arrives at `BP-B` at `t12-16ms` Running wasm-runtime=eos-vm-jit eos-vm-oc-enable on relay node will reduce the validation time. From 7bd985028978d6e46c942aafbb1967c3acf87fb7 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 10:15:36 -0500 Subject: [PATCH 22/31] GH-1784 Add example with full blocks --- .../10_block-producing-explained.md | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md index 147785d519..b2bac29133 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md @@ -19,7 +19,7 @@ For simplicity of the explanation let's consider the following notations: Peer validation for similar hardware/version/config will be <= `m` -**Let's consider for exemplification the following two BPs and their network topology as depicted in the below diagram** +**Let's consider the example of the following two BPs and their network topology as depicted in the below diagram** ``` +------+ +------+ +------+ +------+ @@ -40,25 +40,45 @@ Starting in Leap 4.0, blocks are propagated after block header validation. This Starting in Leap 5.0, blocks in a round are started immediately after the completion of the previous block. Before 5.0, blocks were always started on `w` intervals and a node would "sleep" between blocks if needed. In 5.0, the "sleeps" are all moved to the end of the block production round. -## Example 1: block arrives 110ms early (zero network latency between BP and immediate peer) +## Example 1: block arrives 110ms early +* Assuming zero network latency between BP and immediate BP peer. +* Assuming blocks do not reach `m` and therefore take `w - a` time to produce. +* Assume block completion including signing takes zero time. * `BP-A` has e = 120, n = 0ms, h = 5ms, a = 10ms * `BP-A` sends b1 at `t1-10ms` => `BP-A-Peer` processes `h=5ms`, sends at `t-5ms` => `BP-B-Peer` processes `h=5ms`, sends at `t-0ms` => arrives at `BP-B` at `t`. * `BP-A` starts b2 at `t1-10ms`, sends b2 at `t2-20ms` => `BP-A-Peer` processes `h=5ms`, sends at `t2-15ms` => `BP-B-Peer` processes `h=5ms`, sends at `t2-10ms` => arrives at `BP-B` at `t2-10ms`. * `BP-A` starts b3 at `t2-20ms`, ... * `BP-A` starts b12 at `t11-110ms`, sends b12 at `t12-120ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-115ms` => `BP-B-Peer` processes `h=5ms`, sends at `t12-110ms` => arrives at `BP-B` at `t12-110ms` -## Example 2: block arrives 80ms early (zero network latency between BP and immediate peer) +## Example 2: block arrives 80ms early +* Assuming zero network latency between BP and immediate BP peer. +* Assuming blocks do not reach `m` and therefore take `w - a` time to produce. +* Assume block completion including signing takes zero time. * `BP-A` has e = 240, n = 150ms, h = 5ms, a = 20ms * `BP-A` sends b1 at `t1-20ms` => `BP-A-Peer` processes `h=5ms`, sends at `t-15ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t+140ms` => arrives at `BP-B` at `t+140ms`. * `BP-A` starts b2 at `t1-20ms`, sends b2 at `t2-40ms` => `BP-A-Peer` processes `h=5ms`, sends at `t2-35ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t2+120ms` => arrives at `BP-B` at `t2+120ms`. * `BP-A` starts b3 at `t2-40ms`, ... * `BP-A` starts b12 at `t11-220ms`, sends b12 at `t12-240ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-235ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t12-80ms` => arrives at `BP-B` at `t12-80ms` -## Example 3: block arrives 16ms late and is dropped (zero network latency between BP and immediate peer) +## Example 3: block arrives 16ms late and is dropped +* Assuming zero network latency between BP and immediate BP peer. +* Assuming blocks do not reach `m` and therefore take `w - a` time to produce. +* Assume block completion including signing takes zero time. * `BP-A` has e = 204, n = 200ms, h = 10ms, a = 17ms * `BP-A` sends b1 at `t1-17ms` => `BP-A-Peer` processes `h=10ms`, sends at `t-7ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t+203ms` => arrives at `BP-B` at `t+203ms`. * `BP-A` starts b2 at `t1-17ms`, sends b2 at `t2-34ms` => `BP-A-Peer` processes `h=10ms`, sends at `t2-24ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2+186ms` => arrives at `BP-B` at `t2+186ms`. * `BP-A` starts b3 at `t2-34ms`, ... * `BP-A` starts b12 at `t11-187ms`, sends b12 at `t12-204ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-194ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t12+16ms` => arrives at `BP-B` at `t12-16ms` +## Example 4: full blocks are produced early +* Assuming zero network latency between BP and immediate BP peer. +* Assume all blocks are full as there are enough queued up unapplied transactions ready to fill all blocks. +* Assume a block can be produced with 200ms worth of transactions in 225ms worth of time. There is overhead for producing the block. +* `BP-A` has e = 120, n = 200ms, h = 10ms, a = 10ms +* `BP-A` sends b1 at `t1-275s` => `BP-A-Peer` processes `h=10ms`, sends at `t-265ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t-55ms` => arrives at `BP-B` at `t-55ms`. +* `BP-A` starts b2 at `t1-275ms`, sends b2 at `t2-550ms (t1-50ms)` => `BP-A-Peer` processes `h=10ms`, sends at `t2-540ms` => `BP-B-Peer` processes `h=10ms`, sends at `t2-530ms` => arrives at `BP-B` at `t2-530ms`. +* `BP-A` starts b3 at `t2-550ms`, ... +* `BP-A` starts b12 at `t11-3025ms`, sends b12 at `t12-3300ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-3290ms` => `BP-B-Peer` processes `h=10ms`, sends at `t12-3280ms` => arrives at `BP-B` at `t12-3280ms` + + Running wasm-runtime=eos-vm-jit eos-vm-oc-enable on relay node will reduce the validation time. From 694b88463db7ac4119eabfcdf4797e11f7ad154a Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 10:20:13 -0500 Subject: [PATCH 23/31] GH-1784 Add in network latency --- .../producer_plugin/10_block-producing-explained.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md index b2bac29133..80dbf09abc 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md @@ -76,9 +76,9 @@ Starting in Leap 5.0, blocks in a round are started immediately after the comple * Assume a block can be produced with 200ms worth of transactions in 225ms worth of time. There is overhead for producing the block. * `BP-A` has e = 120, n = 200ms, h = 10ms, a = 10ms * `BP-A` sends b1 at `t1-275s` => `BP-A-Peer` processes `h=10ms`, sends at `t-265ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t-55ms` => arrives at `BP-B` at `t-55ms`. -* `BP-A` starts b2 at `t1-275ms`, sends b2 at `t2-550ms (t1-50ms)` => `BP-A-Peer` processes `h=10ms`, sends at `t2-540ms` => `BP-B-Peer` processes `h=10ms`, sends at `t2-530ms` => arrives at `BP-B` at `t2-530ms`. +* `BP-A` starts b2 at `t1-275ms`, sends b2 at `t2-550ms (t1-50ms)` => `BP-A-Peer` processes `h=10ms`, sends at `t2-540ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2-330ms` => arrives at `BP-B` at `t2-330ms`. * `BP-A` starts b3 at `t2-550ms`, ... -* `BP-A` starts b12 at `t11-3025ms`, sends b12 at `t12-3300ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-3290ms` => `BP-B-Peer` processes `h=10ms`, sends at `t12-3280ms` => arrives at `BP-B` at `t12-3280ms` +* `BP-A` starts b12 at `t11-3025ms`, sends b12 at `t12-3300ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-3290ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t12-3080ms` => arrives at `BP-B` at `t12-3080ms` Running wasm-runtime=eos-vm-jit eos-vm-oc-enable on relay node will reduce the validation time. From fbc83d1a41617ce82a5f5231944cbb18f46fe583 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 11:06:51 -0500 Subject: [PATCH 24/31] GH-1784 n was used for two different variables --- .../producer_plugin/10_block-producing-explained.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md index 80dbf09abc..bb6cb9663e 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md @@ -6,7 +6,7 @@ For simplicity of the explanation let's consider the following notations: * `r` = `producer_repetitions = 12` (hard-coded value) * `m` = `max_block_cpu_usage` (on-chain consensus value) -* `n` = `max_block_net_usage` (on-chain consensus value) +* `u` = `max_block_net_usage` (on-chain consensus value) * `t` = `block-time` * `e` = `produce-block-offset-ms` (nodeos configuration) * `w` = `block-time-interval = 500ms` (hard-coded value) @@ -34,7 +34,7 @@ If `BP-A`is producing 12 blocks as follows `b(lock) at t(ime) 1`, `bt 1.5`, `bt Please notice that the time of `bt 7` minus `.5` equals the time of `bt 6.5` therefore time `t` is the last block time of `BP-A` and when `BP-B` needs to start its first block. -A block is produced and sent when either it reaches `m` or `n` or `p`. +A block is produced and sent when either it reaches `m` or `u` or `p`. Starting in Leap 4.0, blocks are propagated after block header validation. This means instead of `BP-A Peer` & `BP-B Peer` taking `m` time to validate and forward a block it only takes a small number of milliseconds to verify the block header and then forward the block. @@ -74,7 +74,7 @@ Starting in Leap 5.0, blocks in a round are started immediately after the comple * Assuming zero network latency between BP and immediate BP peer. * Assume all blocks are full as there are enough queued up unapplied transactions ready to fill all blocks. * Assume a block can be produced with 200ms worth of transactions in 225ms worth of time. There is overhead for producing the block. -* `BP-A` has e = 120, n = 200ms, h = 10ms, a = 10ms +* `BP-A` has e = 120, m = 200ms, n = 200ms, h = 10ms, a = 10ms * `BP-A` sends b1 at `t1-275s` => `BP-A-Peer` processes `h=10ms`, sends at `t-265ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t-55ms` => arrives at `BP-B` at `t-55ms`. * `BP-A` starts b2 at `t1-275ms`, sends b2 at `t2-550ms (t1-50ms)` => `BP-A-Peer` processes `h=10ms`, sends at `t2-540ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2-330ms` => arrives at `BP-B` at `t2-330ms`. * `BP-A` starts b3 at `t2-550ms`, ... From 48422bf0d5233cbd23148837139b19c98026ee12 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 12:30:45 -0500 Subject: [PATCH 25/31] GH-1784 Additinal clarification and a fix --- .../10_block-producing-explained.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md index bb6cb9663e..454a2fb613 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/10_block-producing-explained.md @@ -41,7 +41,7 @@ Starting in Leap 4.0, blocks are propagated after block header validation. This Starting in Leap 5.0, blocks in a round are started immediately after the completion of the previous block. Before 5.0, blocks were always started on `w` intervals and a node would "sleep" between blocks if needed. In 5.0, the "sleeps" are all moved to the end of the block production round. ## Example 1: block arrives 110ms early -* Assuming zero network latency between BP and immediate BP peer. +* Assuming zero network latency between all nodes. * Assuming blocks do not reach `m` and therefore take `w - a` time to produce. * Assume block completion including signing takes zero time. * `BP-A` has e = 120, n = 0ms, h = 5ms, a = 10ms @@ -51,30 +51,33 @@ Starting in Leap 5.0, blocks in a round are started immediately after the comple * `BP-A` starts b12 at `t11-110ms`, sends b12 at `t12-120ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-115ms` => `BP-B-Peer` processes `h=5ms`, sends at `t12-110ms` => arrives at `BP-B` at `t12-110ms` ## Example 2: block arrives 80ms early -* Assuming zero network latency between BP and immediate BP peer. +* Assuming zero network latency between `BP-A` & `BP-A Peer` and between `BP-B Peer` & `BP-B`. +* Assuming 150ms network latency between `BP-A Peer` & `BP-B Peer`. * Assuming blocks do not reach `m` and therefore take `w - a` time to produce. * Assume block completion including signing takes zero time. -* `BP-A` has e = 240, n = 150ms, h = 5ms, a = 20ms +* `BP-A` has e = 240, n = 0ms/150ms, h = 5ms, a = 20ms * `BP-A` sends b1 at `t1-20ms` => `BP-A-Peer` processes `h=5ms`, sends at `t-15ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t+140ms` => arrives at `BP-B` at `t+140ms`. * `BP-A` starts b2 at `t1-20ms`, sends b2 at `t2-40ms` => `BP-A-Peer` processes `h=5ms`, sends at `t2-35ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t2+120ms` => arrives at `BP-B` at `t2+120ms`. * `BP-A` starts b3 at `t2-40ms`, ... * `BP-A` starts b12 at `t11-220ms`, sends b12 at `t12-240ms` => `BP-A-Peer` processes `h=5ms`, sends at `t12-235ms` =(150ms)> `BP-B-Peer` processes `h=5ms`, sends at `t12-80ms` => arrives at `BP-B` at `t12-80ms` ## Example 3: block arrives 16ms late and is dropped -* Assuming zero network latency between BP and immediate BP peer. +* Assuming zero network latency between `BP-A` & `BP-A Peer` and between `BP-B Peer` & `BP-B`. +* Assuming 200ms network latency between `BP-A Peer` & `BP-B Peer`. * Assuming blocks do not reach `m` and therefore take `w - a` time to produce. * Assume block completion including signing takes zero time. -* `BP-A` has e = 204, n = 200ms, h = 10ms, a = 17ms +* `BP-A` has e = 204, n = 0ms/200ms, h = 10ms, a = 17ms * `BP-A` sends b1 at `t1-17ms` => `BP-A-Peer` processes `h=10ms`, sends at `t-7ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t+203ms` => arrives at `BP-B` at `t+203ms`. * `BP-A` starts b2 at `t1-17ms`, sends b2 at `t2-34ms` => `BP-A-Peer` processes `h=10ms`, sends at `t2-24ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2+186ms` => arrives at `BP-B` at `t2+186ms`. * `BP-A` starts b3 at `t2-34ms`, ... -* `BP-A` starts b12 at `t11-187ms`, sends b12 at `t12-204ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-194ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t12+16ms` => arrives at `BP-B` at `t12-16ms` +* `BP-A` starts b12 at `t11-187ms`, sends b12 at `t12-204ms` => `BP-A-Peer` processes `h=10ms`, sends at `t12-194ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t12+16ms` => arrives at `BP-B` at `t12+16ms` ## Example 4: full blocks are produced early -* Assuming zero network latency between BP and immediate BP peer. +* Assuming zero network latency between `BP-A` & `BP-A Peer` and between `BP-B Peer` & `BP-B`. +* Assuming 200ms network latency between `BP-A Peer` & `BP-B Peer`. * Assume all blocks are full as there are enough queued up unapplied transactions ready to fill all blocks. * Assume a block can be produced with 200ms worth of transactions in 225ms worth of time. There is overhead for producing the block. -* `BP-A` has e = 120, m = 200ms, n = 200ms, h = 10ms, a = 10ms +* `BP-A` has e = 120, m = 200ms, n = 0ms/200ms, h = 10ms, a = 10ms * `BP-A` sends b1 at `t1-275s` => `BP-A-Peer` processes `h=10ms`, sends at `t-265ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t-55ms` => arrives at `BP-B` at `t-55ms`. * `BP-A` starts b2 at `t1-275ms`, sends b2 at `t2-550ms (t1-50ms)` => `BP-A-Peer` processes `h=10ms`, sends at `t2-540ms` =(200ms)> `BP-B-Peer` processes `h=10ms`, sends at `t2-330ms` => arrives at `BP-B` at `t2-330ms`. * `BP-A` starts b3 at `t2-550ms`, ... From 17266b189e8b927221e30ce1dd963b461477f270 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 13:31:28 -0500 Subject: [PATCH 26/31] GH-1784 Rename cpu_effort_us to cpu_effort --- .../eosio/producer_plugin/block_timing_util.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp b/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp index f4d68061c7..27ba83ff6c 100644 --- a/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp +++ b/plugins/producer_plugin/include/eosio/producer_plugin/block_timing_util.hpp @@ -45,25 +45,25 @@ namespace block_timing_util { // example, given block_interval=500 ms and cpu effort=400 ms, assuming our round starts at time point 0; in the // past, the block start time points would be at time point -500, 0, 500, 1000, 1500, 2000 .... With this new // approach, the block time points would become -500, -100, 300, 700, 1100 ... - inline fc::time_point production_round_block_start_time(fc::microseconds cpu_effort_us, chain::block_timestamp_type block_time) { + inline fc::time_point production_round_block_start_time(fc::microseconds cpu_effort, chain::block_timestamp_type block_time) { uint32_t block_slot = block_time.slot; uint32_t production_round_start_block_slot = (block_slot / chain::config::producer_repetitions) * chain::config::producer_repetitions; uint32_t production_round_index = block_slot % chain::config::producer_repetitions; return chain::block_timestamp_type(production_round_start_block_slot - 1).to_time_point() + - fc::microseconds(cpu_effort_us.count() * production_round_index); + fc::microseconds(cpu_effort.count() * production_round_index); } - inline fc::time_point calculate_producing_block_deadline(fc::microseconds cpu_effort_us, chain::block_timestamp_type block_time) { - auto estimated_deadline = production_round_block_start_time(cpu_effort_us, block_time) + fc::microseconds(cpu_effort_us); + inline fc::time_point calculate_producing_block_deadline(fc::microseconds cpu_effort, chain::block_timestamp_type block_time) { + auto estimated_deadline = production_round_block_start_time(cpu_effort, block_time) + cpu_effort; auto now = fc::time_point::now(); if (estimated_deadline > now) { return estimated_deadline; } else { // This could only happen when the producer stop producing and then comes back alive in the middle of its own // production round. In this case, we just use the hard deadline. - const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort_us.count()); - return std::min(hard_deadline, now + cpu_effort_us); + const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort.count()); + return std::min(hard_deadline, now + cpu_effort); } } @@ -118,7 +118,7 @@ namespace block_timing_util { // Return the *next* block start time according to its block time slot. // Returns empty optional if no producers are in the active_schedule. // block_num is only used for watermark minimum offset. - inline std::optional calculate_producer_wake_up_time(fc::microseconds cpu_effort_us, uint32_t block_num, + inline std::optional calculate_producer_wake_up_time(fc::microseconds cpu_effort, uint32_t block_num, const chain::block_timestamp_type& ref_block_time, const std::set& producers, const std::vector& active_schedule, @@ -141,7 +141,7 @@ namespace block_timing_util { return {}; } - return production_round_block_start_time(cpu_effort_us, chain::block_timestamp_type(wake_up_slot)); + return production_round_block_start_time(cpu_effort, chain::block_timestamp_type(wake_up_slot)); } } // namespace block_timing_util From 54e42bc1980d959b2348fc65d127ad9031514af5 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 13:38:19 -0500 Subject: [PATCH 27/31] GH-1784 Update produce-block-offset-ms description --- docs/01_nodeos/03_plugins/producer_plugin/index.md | 6 +++--- plugins/producer_plugin/producer_plugin.cpp | 2 +- tests/PerformanceHarness/performance_test_basic.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/01_nodeos/03_plugins/producer_plugin/index.md b/docs/01_nodeos/03_plugins/producer_plugin/index.md index 11157f5936..3d36b24f04 100644 --- a/docs/01_nodeos/03_plugins/producer_plugin/index.md +++ b/docs/01_nodeos/03_plugins/producer_plugin/index.md @@ -72,9 +72,9 @@ Config Options for eosio::producer_plugin: can extend during low usage (only enforced subjectively; use 1000 to not enforce any limit) - --produce-block-offset-ms arg (=450) The number of milliseconds early the - last block of a production round should - be produced. + --produce-block-offset-ms arg (=450) The minimum time to reserve at the end + of a production round for blocks to + propagate to the next block producer. --max-block-cpu-usage-threshold-us arg (=5000) Threshold of CPU block production to consider block full; when within diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 7fc20de7cc..c425447e58 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -1044,7 +1044,7 @@ void producer_plugin::set_program_options( ("greylist-limit", boost::program_options::value()->default_value(1000), "Limit (between 1 and 1000) on the multiple that CPU/NET virtual resources can extend during low usage (only enforced subjectively; use 1000 to not enforce any limit)") ("produce-block-offset-ms", bpo::value()->default_value(config::default_produce_block_offset_ms), - "The number of milliseconds early the last block of a production round should be produced.") + "The minimum time to reserve at the end of a production round for blocks to propagate to the next block producer.") ("max-block-cpu-usage-threshold-us", bpo::value()->default_value( 5000 ), "Threshold of CPU block production to consider block full; when within threshold of max-block-cpu-usage block can be produced immediately") ("max-block-net-usage-threshold-bytes", bpo::value()->default_value( 1024 ), diff --git a/tests/PerformanceHarness/performance_test_basic.py b/tests/PerformanceHarness/performance_test_basic.py index f3448e56cd..0d56329985 100755 --- a/tests/PerformanceHarness/performance_test_basic.py +++ b/tests/PerformanceHarness/performance_test_basic.py @@ -720,7 +720,7 @@ def _createBaseArgumentParser(defEndpointApiDef: str, defProdNodeCnt: int, defVa choices=["all", "debug", "info", "warn", "error", "off"], default="info") ptbBaseParserGroup.add_argument("--net-threads", type=int, help=argparse.SUPPRESS if suppressHelp else "Number of worker threads in net_plugin thread pool", default=4) ptbBaseParserGroup.add_argument("--disable-subjective-billing", type=bool, help=argparse.SUPPRESS if suppressHelp else "Disable subjective CPU billing for API/P2P transactions", default=True) - ptbBaseParserGroup.add_argument("--produce-block-offset-ms", type=int, help=argparse.SUPPRESS if suppressHelp else "The number of milliseconds early the last block of a production round should be produced.", default=0) + ptbBaseParserGroup.add_argument("--produce-block-offset-ms", type=int, help=argparse.SUPPRESS if suppressHelp else "The minimum time to reserve at the end of a production round for blocks to propagate to the next block producer.", default=0) ptbBaseParserGroup.add_argument("--producer-threads", type=int, help=argparse.SUPPRESS if suppressHelp else "Number of worker threads in producer thread pool", default=2) ptbBaseParserGroup.add_argument("--read-only-write-window-time-us", type=int, help=argparse.SUPPRESS if suppressHelp else "Time in microseconds the write window lasts.", default=200000) ptbBaseParserGroup.add_argument("--read-only-read-window-time-us", type=int, help=argparse.SUPPRESS if suppressHelp else "Time in microseconds the read window lasts.", default=60000) From 1cc8b3d7529f8723dbbb7541b316275c615dd08f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 14:06:30 -0500 Subject: [PATCH 28/31] GH-1784 Added comment --- .../include/eosio/producer_plugin/producer_plugin.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp index 0f38ef35f6..495d1b91a6 100644 --- a/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp +++ b/plugins/producer_plugin/include/eosio/producer_plugin/producer_plugin.hpp @@ -17,6 +17,7 @@ class producer_plugin : public appbase::plugin { struct runtime_options { std::optional max_transaction_time; std::optional max_irreversible_block_age; + // minimum time to reserve at the end of a production round for blocks to propagate to the next block producer. std::optional produce_block_offset_ms; std::optional subjective_cpu_leeway_us; std::optional greylist_limit; From a336ef25a358d40a74b7b254caea15c731213343 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 14:06:55 -0500 Subject: [PATCH 29/31] GH-1784 Renamed _produce_block_cpu_effort_us to _produce_block_cpu_effort --- plugins/producer_plugin/producer_plugin.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index c425447e58..3b90583acd 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -501,7 +501,7 @@ class producer_plugin_impl : public std::enable_shared_from_this _received_block{0}; // modified by net_plugin thread pool fc::microseconds _max_irreversible_block_age_us; // produce-block-offset is in terms of the complete round, internally use calculated value for each block of round - fc::microseconds _produce_block_cpu_effort_us; + fc::microseconds _produce_block_cpu_effort; fc::time_point _pending_block_deadline; uint32_t _max_block_cpu_usage_threshold_us = 0; uint32_t _max_block_net_usage_threshold_bytes = 0; @@ -615,12 +615,12 @@ class producer_plugin_impl : public std::enable_shared_from_thischain(); fc_dlog(_log, "Waiting till another block is received and scheduling Speculative/Production Change"); - auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort_us, chain.head_block_num(), calculate_pending_block_time(), + auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort, chain.head_block_num(), calculate_pending_block_time(), _producers, chain.head_block_state()->active_schedule.producers, _producer_watermarks); schedule_delayed_production_loop(weak_from_this(), wake_time); @@ -2497,7 +2497,7 @@ void producer_plugin_impl::schedule_production_loop() { chain::controller& chain = chain_plug->chain(); fc_dlog(_log, "Speculative Block Created; Scheduling Speculative/Production Change"); EOS_ASSERT(chain.is_building_block(), missing_pending_block_state, "speculating without pending_block_state"); - auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort_us, chain.pending_block_num(), chain.pending_block_timestamp(), + auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort, chain.pending_block_num(), chain.pending_block_timestamp(), _producers, chain.head_block_state()->active_schedule.producers, _producer_watermarks); schedule_delayed_production_loop(weak_from_this(), wake_time); @@ -2514,7 +2514,7 @@ void producer_plugin_impl::schedule_maybe_produce_block(bool exhausted) { assert(in_producing_mode()); // we succeeded but block may be exhausted static const boost::posix_time::ptime epoch(boost::gregorian::date(1970, 1, 1)); - auto deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort_us, chain.pending_block_time()); + auto deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort, chain.pending_block_time()); if (!exhausted && deadline > fc::time_point::now()) { // ship this block off no later than its deadline From 440d33a65debbf3bae5477685f8b41612970d93e Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 15:00:59 -0500 Subject: [PATCH 30/31] GH-1784 Use ceil to take the most conservative approach to the user provided offset --- plugins/producer_plugin/producer_plugin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 3b90583acd..08749dd47d 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -615,7 +615,8 @@ class producer_plugin_impl : public std::enable_shared_from_this(std::ceil(produce_block_offset_ms / static_cast(config::producer_repetitions))) ); } fc::microseconds get_produce_block_offset() const { From 015fc81a16f6719848098078b985f1f6f3adf90f Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 20 Oct 2023 15:26:55 -0500 Subject: [PATCH 31/31] GH-1784 Calculate in microseconds as to not lose precision --- plugins/producer_plugin/producer_plugin.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 08749dd47d..e57fe48f54 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -615,8 +615,7 @@ class producer_plugin_impl : public std::enable_shared_from_this(std::ceil(produce_block_offset_ms / static_cast(config::producer_repetitions))) ); + _produce_block_cpu_effort = fc::microseconds(config::block_interval_us - (produce_block_offset_ms*1000 / config::producer_repetitions) ); } fc::microseconds get_produce_block_offset() const {