Skip to content

Commit

Permalink
Merge pull request #814 from AntelopeIO/GH-789-sync-ahead-main
Browse files Browse the repository at this point in the history
[1.0.2 -> main] Prevent sync ahead in irreversible mode
  • Loading branch information
heifner authored Sep 24, 2024
2 parents dc64782 + 07bb389 commit 0a810f4
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 20 deletions.
6 changes: 4 additions & 2 deletions plugins/net_plugin/net_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2206,10 +2206,12 @@ namespace eosio {
auto forkdb_head = cc.fork_db_head();
auto calculated_lib = forkdb_head.irreversible_blocknum();
auto num_blocks_that_can_be_applied = calculated_lib > head_num ? calculated_lib - head_num : 0;
// add blocks that can potentially be applied as they are not in the forkdb yet
num_blocks_that_can_be_applied += blk_num > forkdb_head.block_num() ? blk_num - forkdb_head.block_num() : 0;
if (num_blocks_that_can_be_applied < sync_fetch_span) {
if (head_num )
fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}",
("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size()));
fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}",
("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size()));
return true;
}
}
Expand Down
21 changes: 21 additions & 0 deletions tests/TestHarness/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,27 @@ def findInLog(self, searchStr):
return True
return False

def linesInLog(self, searchStr):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
lines=[]
for file in files:
with open(file, 'r') as f:
for line in f:
if searchStr in line:
lines.append(line)
return lines

def countInLog(self, searchStr) -> int:
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
count = 0
for file in files:
with open(file, 'r') as f:
contents = f.read()
count += contents.count(searchStr)
return count

# verify only one or two 'Starting block' per block number unless block is restarted
def verifyStartingBlockMessages(self):
dataDir=Utils.getNodeDataDir(self.nodeId)
Expand Down
67 changes: 49 additions & 18 deletions tests/nodeos_startup_catchup.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ def waitForNodeStarted(node):
transactionsPerBlock=targetTpsPerGenerator*trxGeneratorCnt*timePerBlock/1000
steadyStateWait=20
startBlockNum=blockNum+steadyStateWait
numBlocks=20
numBlocks=400
endBlockNum=startBlockNum+numBlocks
waitForBlock(node0, endBlockNum)
waitForBlock(node0, endBlockNum, timeout=numBlocks)
steadyStateWindowTrxs=0
steadyStateAvg=0
steadyStateWindowBlks=0
Expand Down Expand Up @@ -182,23 +182,18 @@ def waitForNodeStarted(node):
Print("Shutdown catchup node and validate exit code")
catchupNode.interruptAndVerifyExitStatus(60)

# every other catchup make a lib catchup
if catchup_num % 2 == 0:
Print(f"Wait for producer to advance lib past head of catchup {catchupHead}")
# catchupHead+5 to allow for advancement of head during shutdown of catchupNode
waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib)
Print(f"Wait for producer to advance lib past head of catchup {catchupHead}")
# catchupHead+5 to allow for advancement of head during shutdown of catchupNode
waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib)

Print("Restart catchup node")
addSwapFlags = None
if catchup_num % 3 == 0:
addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""}
catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags)
catchupNode.relaunch(skipGenesis=False)
waitForNodeStarted(catchupNode)
lastCatchupLibNum=lib(catchupNode)

Print("Verify catchup node is advancing")
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastCatchupLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib)
waitForBlock(catchupNode, lastLibNum, timeout=twoRoundsTimeout, blockType=BlockType.lib)

Print("Verify producer is still advancing LIB")
lastLibNum=lib(node0)
Expand All @@ -209,20 +204,56 @@ def waitForNodeStarted(node):
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastLibNum, timeout=(numBlocksToCatchup/2 + 60), blockType=BlockType.lib)
catchupNode.interruptAndVerifyExitStatus(60)

Print("Verify catchup without a block log")
addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""}
catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags)
waitForNodeStarted(catchupNode)
lastCatchupLibNum=lib(catchupNode)

Print("Verify catchup node is advancing without block log")
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib)
catchupNode.interruptAndVerifyExitStatus(60)
catchupNode.popenProc=None

logFile = Utils.getNodeDataDir(catchupNodeNum) + "/stderr.txt"
f = open(logFile)
contents = f.read()
# Verify not syncing ahead of sync-fetch-span
sync_fetch_span = 1000 # default
irreversible = False
if catchupNode.nodeId in specificExtraNodeosArgs:
m = re.search(r"sync-fetch-span (\d+)", specificExtraNodeosArgs[catchupNode.nodeId])
if m is not None:
sync_fetch_span = int(m.group(1))
irreversible = re.search(r"irreversible", specificExtraNodeosArgs[catchupNode.nodeId]) is not None
Print(f"Verify request span for sync-fetch-span {sync_fetch_span} of {catchupNode.data_dir}")
lines = catchupNode.linesInLog("requesting range")
for line in lines:
m = re.search(r"requesting range (\d+) to (\d+), fhead (\d+), lib (\d+)", line)
if m is not None:
startBlockNum=int(m.group(1))
endBlockNum=int(m.group(2))
fhead=int(m.group(3))
libNum=int(m.group(4))
if endBlockNum-startBlockNum > sync_fetch_span:
errorExit(f"Requested range exceeds sync-fetch-span {sync_fetch_span}: {line}")
if irreversible:
# for now just use a larger tolerance, later when the logs include calculated lib this can be more precise
# See https://github.com/AntelopeIO/spring/issues/806
if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*10):
errorExit(f"Requested range too far head of fork head {fhead} in irreversible mode, sync-fetch-span {sync_fetch_span}: {line}")
else:
if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*2-1):
errorExit(f"Requested range too far head of fork head {fhead} sync-fetch-span {sync_fetch_span}: {line}")

# See https://github.com/AntelopeIO/spring/issues/81 for fix to reduce the number of expected unlinkable blocks
# Test verifies LIB is advancing, check to see that not too many unlinkable block exceptions are generated
# while syncing up to head.
numUnlinkable = contents.count("3030001 unlinkable_block_exception: Unlinkable block")
numUnlinkable = catchupNode.countInLog("unlinkable_block")
numUnlinkableAllowed = 500
Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block_exception in {logFile}")
Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block in {catchupNode.data_dir}")
if numUnlinkable > numUnlinkableAllowed:
errorExit(f"Node{catchupNodeNum} has {numUnlinkable} which is more than the configured "
f"allowed {numUnlinkableAllowed} unlinkable blocks: {logFile}.")
f"allowed {numUnlinkableAllowed} unlinkable blocks: {catchupNode.data_dir}.")

testSuccessful=True

Expand Down

0 comments on commit 0a810f4

Please sign in to comment.