Skip to content

Commit

Permalink
Merge pull request #1845 from AntelopeIO/GH-1837-start-block-5.0
Browse files Browse the repository at this point in the history
[5.0] Minimize abort/start block for speculative blocks
  • Loading branch information
heifner authored Nov 1, 2023
2 parents 71d4022 + 7e34c67 commit a993c02
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,7 @@ namespace block_timing_util {
}

inline fc::time_point calculate_producing_block_deadline(fc::microseconds cpu_effort, chain::block_timestamp_type block_time) {
auto estimated_deadline = production_round_block_start_time(cpu_effort, block_time) + cpu_effort;
auto now = fc::time_point::now();
if (estimated_deadline > now) {
return estimated_deadline;
} else {
// This could only happen when the producer stop producing and then comes back alive in the middle of its own
// production round. In this case, we just use the hard deadline.
const auto hard_deadline = block_time.to_time_point() - fc::microseconds(chain::config::block_interval_us - cpu_effort.count());
return std::min(hard_deadline, now + cpu_effort);
}
return production_round_block_start_time(cpu_effort, block_time) + cpu_effort;
}

namespace detail {
Expand Down
20 changes: 16 additions & 4 deletions plugins/producer_plugin/producer_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,9 +772,10 @@ class producer_plugin_impl : public std::enable_shared_from_this<producer_plugin
}

void restart_speculative_block() {
// log message is used by Node.py verifyStartingBlockMessages in distributed-transactions-test.py test
fc_dlog(_log, "Restarting exhausted speculative block #${n}", ("n", chain_plug->chain().head_block_num() + 1));
// abort the pending block
abort_block();

schedule_production_loop();
}

Expand Down Expand Up @@ -1829,8 +1830,6 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
last_start_block_time = now;
}

// create speculative blocks at regular intervals, so we create blocks with "current" block time
_pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort, block_time);
if (in_producing_mode()) {
uint32_t production_round_index = block_timestamp_type(block_time).slot % chain::config::producer_repetitions;
if (production_round_index == 0) {
Expand All @@ -1844,6 +1843,15 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() {
}
}

// create speculative blocks at regular intervals, so we create blocks with "current" block time
_pending_block_deadline = block_timing_util::calculate_producing_block_deadline(_produce_block_cpu_effort, block_time);
if (in_speculating_mode()) { // if we are producing, then produce block even if deadline has passed
// speculative block, no reason to start a block that will immediately be re-started, set deadline in the future
// a block should come in during this time, if not then just keep creating the block every produce_block_cpu_effort
if (now + fc::milliseconds(config::block_interval_ms/10) > _pending_block_deadline) {
_pending_block_deadline = now + _produce_block_cpu_effort;
}
}
const auto& preprocess_deadline = _pending_block_deadline;

fc_dlog(_log, "Starting block #${n} at ${time} producer ${p}", ("n", pending_block_num)("time", now)("p", scheduled_producer.producer_name));
Expand Down Expand Up @@ -2488,9 +2496,13 @@ void producer_plugin_impl::schedule_production_loop() {
chain::controller& chain = chain_plug->chain();
fc_dlog(_log, "Speculative Block Created; Scheduling Speculative/Production Change");
EOS_ASSERT(chain.is_building_block(), missing_pending_block_state, "speculating without pending_block_state");
auto wake_time = block_timing_util::calculate_producer_wake_up_time(_produce_block_cpu_effort, chain.pending_block_num(), chain.pending_block_timestamp(),
auto wake_time = block_timing_util::calculate_producer_wake_up_time(fc::microseconds{config::block_interval_us}, chain.pending_block_num(), chain.pending_block_timestamp(),
_producers, chain.head_block_state()->active_schedule.producers,
_producer_watermarks);
if (wake_time && fc::time_point::now() > *wake_time) {
// if wake time has already passed then use the block deadline instead
wake_time = _pending_block_deadline;
}
schedule_delayed_production_loop(weak_from_this(), wake_time);
} else {
fc_dlog(_log, "Speculative Block Created");
Expand Down
60 changes: 55 additions & 5 deletions plugins/producer_plugin/test/test_block_timing_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) {
{
// Scenario 2:
// In producing mode and it is already too late to meet the optimized deadlines,
// the returned deadline can never be later than the hard deadlines.
// the returned deadline can never be later than the next block deadline.
// now is not taken into account, but leaving set_now in to verify it has no affect.

auto second_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 1);
fc::mock_time_traits::set_now(second_block_time.to_time_point() - fc::milliseconds(200));
auto second_block_hard_deadline = second_block_time.to_time_point() - fc::milliseconds(100);
auto second_block_hard_deadline = second_block_time.to_time_point() - fc::milliseconds(200);
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, second_block_time),
second_block_hard_deadline);
// use previous deadline as now
Expand All @@ -75,22 +76,53 @@ BOOST_AUTO_TEST_CASE(test_calculate_block_deadline) {
auto seventh_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 6);
fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(500));

// 7th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, seventh_block_time),
seventh_block_time.to_time_point() - fc::milliseconds(100));
seventh_block_time.to_time_point() - fc::milliseconds(700));

// use previous deadline as now
fc::mock_time_traits::set_now(seventh_block_time.to_time_point() - fc::milliseconds(100));
auto eighth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 7);

// 8th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, eighth_block_time),
eighth_block_time.to_time_point() - fc::milliseconds(200));
eighth_block_time.to_time_point() - fc::milliseconds(800));

// use previous deadline as now
fc::mock_time_traits::set_now(eighth_block_time.to_time_point() - fc::milliseconds(200));
auto ninth_block_time = eosio::chain::block_timestamp_type(production_round_1st_block_slot + 8);

// 9th block where cpu effort is 100ms less per block
BOOST_CHECK_EQUAL(calculate_producing_block_deadline(cpu_effort, ninth_block_time),
ninth_block_time.to_time_point() - fc::milliseconds(300));
ninth_block_time.to_time_point() - fc::milliseconds(900));
}
{
// Scenario: time has passed for block to be produced
// Ask for a deadline for current block but its deadline has already passed
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
// with the default_cpu_effort, the first block deadline would be 29.9625 (29.500 + 0.4625)
fc::mock_time_traits::set_now(fc::time_point::from_iso_string("2023-10-29T13:40:29.963")); // past first block interval
auto first_block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-29T13:40:30.000"));
// pass the first block deadline
fc::time_point block_deadline = calculate_producing_block_deadline(default_cpu_effort, first_block_time);
// not equal since block_deadline is 29.962500
BOOST_CHECK_NE(block_deadline, fc::time_point::from_iso_string("2023-10-29T13:40:29.962"));
BOOST_CHECK_EQUAL(block_deadline.to_iso_string(), fc::time_point::from_iso_string("2023-10-29T13:40:29.962").to_iso_string()); // truncates

// second block has passed
fc::mock_time_traits::set_now(fc::time_point::from_iso_string("2023-10-29T13:40:30.426")); // past second block interval
auto second_block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-29T13:40:30.500"));
// pass the second block deadline
block_deadline = calculate_producing_block_deadline(default_cpu_effort, second_block_time);
// 29.962500 + (450000/12) = 30.425
BOOST_CHECK_EQUAL(block_deadline, fc::time_point::from_iso_string("2023-10-29T13:40:30.425"));
}
{
// Real scenario that caused multiple start blocks
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
auto block_time = eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-31T13:37:35.000"));
fc::time_point block_deadline = calculate_producing_block_deadline(default_cpu_effort, block_time);
BOOST_CHECK_EQUAL(block_deadline.to_iso_string(), fc::time_point::from_iso_string("2023-10-31T13:37:34.587").to_iso_string());
}
}

Expand Down Expand Up @@ -248,6 +280,24 @@ BOOST_AUTO_TEST_CASE(test_calculate_producer_wake_up_time) {
expected_block_time = block_timestamp_type(prod_round_1st_block_slot + 2*config::producer_repetitions + 2).to_time_point(); // with watermark, wait until next
BOOST_CHECK_EQUAL(calculate_producer_wake_up_time(full_cpu_effort, 2, block_timestamp, producers, active_schedule, prod_watermarks), expected_block_time);
}
{ // actual example that caused multiple start blocks
producer_watermarks prod_watermarks;
std::set<account_name> producers = {
"inita"_n, "initb"_n, "initc"_n, "initd"_n, "inite"_n, "initf"_n, "initg"_n, "p1"_n,
"inith"_n, "initi"_n, "initj"_n, "initk"_n, "initl"_n, "initm"_n, "initn"_n,
"inito"_n, "initp"_n, "initq"_n, "initr"_n, "inits"_n, "initt"_n, "initu"_n, "p2"_n
};
std::vector<chain::producer_authority> active_schedule{
{"inita"_n}, {"initb"_n}, {"initc"_n}, {"initd"_n}, {"inite"_n}, {"initf"_n}, {"initg"_n},
{"inith"_n}, {"initi"_n}, {"initj"_n}, {"initk"_n}, {"initl"_n}, {"initm"_n}, {"initn"_n},
{"inito"_n}, {"initp"_n}, {"initq"_n}, {"initr"_n}, {"inits"_n}, {"initt"_n}, {"initu"_n}
};
auto default_cpu_effort = fc::microseconds(block_interval.count() - (450000/12)); // 462,500
auto wake_time = calculate_producer_wake_up_time(default_cpu_effort, 106022362, eosio::chain::block_timestamp_type(fc::time_point::from_iso_string("2023-10-31T16:06:41.000")),
producers, active_schedule, prod_watermarks);
BOOST_REQUIRE(!!wake_time);
BOOST_CHECK_EQUAL(wake_time->to_iso_string(), fc::time_point::from_iso_string("2023-10-31T16:06:40.587").to_iso_string());
}

}

Expand Down
32 changes: 32 additions & 0 deletions tests/TestHarness/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,38 @@ def findInLog(self, searchStr):
return True
return False

# verify only one or two 'Starting block' per block number unless block is restarted
def verifyStartingBlockMessages(self):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
for f in files:
blockNumbers = set()
duplicateBlockNumbers = set()
threeStartsFound = False
lastRestartBlockNum = 0
blockNumber = 0

with open(f, 'r') as file:
for line in file:
match = re.match(r".*Restarting exhausted speculative block #(\d+)", line)
if match:
lastRestartBlockNum = match.group(1)
continue
if re.match(r".*unlinkable_block_exception", line):
lastRestartBlockNum = blockNumber
continue
match = re.match(r".*Starting block #(\d+)", line)
if match:
blockNumber = match.group(1)
if blockNumber != lastRestartBlockNum and blockNumber in duplicateBlockNumbers:
print(f"Duplicate Staring block found: {blockNumber} in {f}")
threeStartsFound = True
if blockNumber != lastRestartBlockNum and blockNumber in blockNumbers:
duplicateBlockNumbers.add(blockNumber)
blockNumbers.add(blockNumber)

return not threeStartsFound

def analyzeProduction(self, specificBlockNum=None, thresholdMs=500):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
Expand Down
5 changes: 5 additions & 0 deletions tests/distributed-transactions-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@

cluster.compareBlockLogs()

# verify only one start block per block unless interrupted
for node in cluster.getAllNodes():
if not node.verifyStartingBlockMessages():
errorExit("Found more than one Starting block in logs")

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful, dumpErrorDetails)
Expand Down

0 comments on commit a993c02

Please sign in to comment.