Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.0]Add integration tests for production-pause-vote-timeout #662

Merged
merged 21 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
bbeb071
Add new parameter finalizerNames to setFinalizers for specifying user…
linh2931 Aug 28, 2024
2888033
Use the same threshold formula as core contract to prevent potentiall…
linh2931 Aug 28, 2024
f9bfcba
Set up test shape for production_pause_vote_timeout test
linh2931 Aug 28, 2024
eff1f6b
First working version of production_pause_vote_timeout.py
linh2931 Aug 28, 2024
a69949d
Update CMakeLists.txt for production_pause_vote_timeout.py and produc…
linh2931 Aug 28, 2024
228a20e
Rename node2 to defproducercProducerNode and node3 to defproducercFin…
linh2931 Aug 28, 2024
4c4abcd
Add detailed test description
linh2931 Aug 28, 2024
e2b5143
Add more comments
linh2931 Aug 28, 2024
d6cd787
Add a test for disable production-pause-vote-timeout using --producti…
linh2931 Aug 28, 2024
ea8dc1f
Rename defproducercProducerNode to producercNode and defproducercFina…
linh2931 Aug 28, 2024
b105dd5
Add --enable-stale-production in relaunching producercNode as it is a…
linh2931 Aug 28, 2024
997c445
Simplify verifying producercNode still producing
linh2931 Aug 28, 2024
7023071
Set biosFinalizer to false so bios node is not counted in threshold c…
linh2931 Aug 29, 2024
ffb7c89
Use producer RPC endpont -paused- to check if production is paused (i…
linh2931 Aug 29, 2024
51698d6
Update comments and test shape
linh2931 Aug 29, 2024
34fbc0f
Verify LIB stalled on node0 and node1
linh2931 Aug 29, 2024
5088ba8
Use producer RPC paused endpoint for all production pause checks
linh2931 Aug 29, 2024
cfc4262
Check stalled LIB reliably
linh2931 Aug 29, 2024
f8d2957
Merge branch 'release/1.0' into production_pause_vote_tests
linh2931 Aug 29, 2024
7cf7dad
Verify production resumes automatically
linh2931 Aug 29, 2024
d7216f6
Check LIB advance before checking unpause
linh2931 Aug 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery.py ${CMAKE_CURRENT_
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_3.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_3.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_pause_vote_timeout.py ${CMAKE_CURRENT_BINARY_DIR}/production_pause_vote_timeout.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_pause_vote_timeout_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/production_pause_vote_timeout_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_restart.py ${CMAKE_CURRENT_BINARY_DIR}/production_restart.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_restart_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/production_restart_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/trx_finality_status_test.py ${CMAKE_CURRENT_BINARY_DIR}/trx_finality_status_test.py COPYONLY)
Expand Down Expand Up @@ -161,6 +163,8 @@ add_test(NAME disaster_recovery_2 COMMAND tests/disaster_recovery_2.py -v ${UNSH
set_property(TEST disaster_recovery_2 PROPERTY LABELS nonparallelizable_tests)
add_test(NAME disaster_recovery_3 COMMAND tests/disaster_recovery_3.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST disaster_recovery_3 PROPERTY LABELS nonparallelizable_tests)
add_test(NAME production_pause_vote_timeout COMMAND tests/production_pause_vote_timeout.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST production_pause_vote_timeout PROPERTY LABELS nonparallelizable_tests)
add_test(NAME production_restart COMMAND tests/production_restart.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST production_restart PROPERTY LABELS nonparallelizable_tests)

Expand Down
10 changes: 5 additions & 5 deletions tests/TestHarness/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,21 +1029,21 @@ def activateInstantFinality(self, biosFinalizer=True, waitForFinalization=True,
return None, transId
return True, transId

def setFinalizers(self, nodes, node=None):
# finalizerNames specifies non-default finalizer name for each node
def setFinalizers(self, nodes, node=None, finalizerNames=None):
# finalizerNames, if present, must specify finalizer names for all the nodes
assert(finalizerNames is None or len(nodes) == len(finalizerNames))
if node is None:
node = self.biosNode
numFins = len(nodes)
threshold = int(numFins * 2 / 3 + 1)
if threshold > 2 and threshold == numFins:
# nodes are often stopped, so do not require all node votes
threshold = threshold - 1
if Utils.Debug: Utils.Print(f"threshold: {threshold}, numFins: {numFins}")
setFinStr = f'{{"finalizer_policy": {{'
setFinStr += f' "threshold": {threshold}, '
setFinStr += f' "finalizers": ['
finNum = 1
for n in nodes:
finName = n.producerName if n.producerName is not None else f"finalizer{finNum}"
finName = finalizerNames[finNum-1] if finalizerNames is not None else n.producerName if n.producerName is not None else f"finalizer{finNum}"
setFinStr += f' {{"description": "{finName}", '
setFinStr += f' "weight":1, '
setFinStr += f' "public_key": "{n.keys[0].blspubkey}", '
Expand Down
197 changes: 197 additions & 0 deletions tests/production_pause_vote_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/env python3
import os
import shutil
import signal
import time

from TestHarness import Cluster, TestHelper, Utils, WalletMgr
from TestHarness.Node import BlockType

####################################################################################
# production_pause_vote_timeout
# Test production-pause-vote-timeout works as expected.
#
# Setup:
#
# Use five nodes in an hourglass topology. The center node is a relay node that
# initially has vote-threads enabled. The other 4 peripheral nodes are:
#
# node0: Enables block production for producera and has the finalizer key with
# description of producera. Has vote-threads enabled. Connect to the center node.
# node1: Enables block production for producerb and has the finalizer key with
# description of producerb. Has vote-threads enabled. Connect to the center node
# and Node1.
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
# producercNode: Enables block production for producerc.
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
# finalizercNode: Has the finalizer key with description of producerc.
# Has vote-threads enabled. Connect to the center node and producercNode.
#
# Test cases:
#
# 1. Bring down finalizercNode. producercNode should eventually
# automatically pause production due to not receiving votes from finalizercNode.
# that are associated to its producerc. However, Node0 and Node1 should not pause.
# Then bring finalizercNode. back up. producercNode should
# automatically resume production.
# 2. Bring down the center node. producercNode should eventually automatically
# pause production due to not receiving votes from Node0 and Node1 that are
# associated with the other producers. However, Node0 and Node1 should not pause.
# Then bring the center node back up. producercNode should automatically
# resume production.
# 3. Restart producercNode with "--production-pause-vote-timeout-ms 0" to
# disable production-pause-vote-timeout. Bring down finalizercNode.
# producercNode should keep producing.
#
####################################################################################

Print=Utils.Print
errorExit=Utils.errorExit

args=TestHelper.parse_args({"-d","--keep-logs","--dump-error-details","-v","--leave-running","--unshared"})
delay=args.d
debug=args.v
dumpErrorDetails=args.dump_error_details
pnodes=3 # number of producing nodes
totalNodes=pnodes + 2 # plus 1 center node and 1 finalizer node for defproducerc
prodCount=1 # number of producers per producing node

Utils.Debug=debug
testSuccessful=False

cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs)
walletMgr=WalletMgr(True, keepRunning=args.leave_running, keepLogs=args.keep_logs)

try:
TestHelper.printSystemInfo("BEGIN")

cluster.setWalletMgr(walletMgr)

Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}')

# for defproducerc producing node
specificExtraNodeosArgs={}
specificExtraNodeosArgs[2]="--production-pause-vote-timeout-ms 1000"

Print("Stand up cluster")
# Cannot use activateIF to transition to Savanna directly as it assumes
# each producer node has finalizer configured.
if cluster.launch(pnodes=pnodes, totalNodes=totalNodes, totalProducers=pnodes, prodCount=prodCount, delay=delay, loadSystemContract=False,
specificExtraNodeosArgs=specificExtraNodeosArgs,
activateIF=False, signatureProviderForNonProducer=True,
topo="./tests/production_pause_vote_timeout_test_shape.json") is False:
errorExit("Failed to stand up eos cluster.")

assert cluster.biosNode.getInfo(exitOnError=True)["head_block_producer"] != "eosio", "launch should have waited for production to change"

node0 = cluster.getNode(0) # producer and finalizer node for defproducera
node1 = cluster.getNode(1) # producer and finalizer node for defproducerb
producercNode = cluster.getNode(2) # producer node for defproducerc
finalizercNode = cluster.getNode(3) # finalizer node for defproducerc
centerNode = cluster.getNode(4)

Print("Set finalizer policy and start transition to Savanna")
# Specifically, need to configure finalizer name for finalizercNode as defproducerc
transId = cluster.setFinalizers(nodes=[node0, node1, finalizercNode], finalizerNames=["defproducera", "defproducerb", "defproducerc"])
assert transId is not None, "setfinalizers failed"
assert cluster.biosNode.waitForTransFinalization(transId), f"setfinalizers transaction {transId} was not rolled into a LIB block"
assert cluster.biosNode.waitForLibToAdvance(), "LIB did not advance after setFinalizers"

# biosNode no longer needed
cluster.biosNode.kill(signal.SIGTERM)
cluster.waitOnClusterSync(blockAdvancing=5)

Print("Wait for LIB on all producing nodes to advance")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"

####################### test 1 ######################

Print("Shutdown finalizercNode")
finalizercNode.kill(signal.SIGTERM)
assert not finalizercNode.verifyAlive(), "finalizercNode did not shutdown"

# wait some time for producercNode paused
paused = False
for i in range(0, 15):
time.sleep(1)
paused = not producercNode.waitForHeadToAdvance(timeout=1)
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
if paused:
Print(f'paused after {i} seconds after finalizercNode was shutdown')
break;
# Verify producercNode paused
assert paused, "producercNode still producing after finalizercNode was shutdown"
# Verify node0 and node1 still producing
assert node0.waitForHeadToAdvance(), "node0 paused after finalizercNode was shutdown"
assert node1.waitForHeadToAdvance(), "node1 paused after finalizercNode was shutdown"

greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
Print("Restart finalizercNode")
finalizercNode.relaunch()

Print("Verify LIB advances after restart of finalizercNode")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"

arhag marked this conversation as resolved.
Show resolved Hide resolved
####################### test 2 ######################

Print("Shutdown centerNode")
centerNode.kill(signal.SIGTERM)
assert not centerNode.verifyAlive(), "centerNode did not shutdown"

# wait some time for producercNode paused
paused = False
for i in range(0, 15):
time.sleep(1)
paused = not producercNode.waitForHeadToAdvance(timeout=1)
if paused:
Print(f'paused after {i} seconds after centerNode was shutdown')
break;
# Verify producercNode paused
assert paused, "producercNode still producing after centerNode was shutdown"
# Verify node0 and node1 still producing
assert node0.waitForHeadToAdvance(), "node0 paused after centerNode was shutdown"
assert node1.waitForHeadToAdvance(), "node1 paused after centerNode was shutdown"

Print("Restart centerNode")
centerNode.relaunch()

Print("Verify LIB advances after restart")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"

arhag marked this conversation as resolved.
Show resolved Hide resolved
####################### test 3 ######################

Print("Shutdown producercNode")
producercNode.kill(signal.SIGTERM)
assert not producercNode.verifyAlive(), "producercNode did not shutdown"

# disable production-pause-vote-timeout
Print("Relaunch producercNode with --production-pause-vote-timeout-ms 0")
addSwapFlags={"--production-pause-vote-timeout-ms": "0"}
producercNode.relaunch(addSwapFlags=addSwapFlags)
heifner marked this conversation as resolved.
Show resolved Hide resolved

Print("Shutdown finalizercNode")
finalizercNode.kill(signal.SIGTERM)
assert not finalizercNode.verifyAlive(), "finalizercNode did not shutdown"

# wait some time to make sure it is not paused
paused = False
for i in range(0, 10):
time.sleep(1)
paused = not producercNode.waitForHeadToAdvance()
if paused:
Print(f'paused after {i} seconds after finalizercNode was shutdown')
break;
# Verify producercNode still producing
assert not paused, "producercNode (--production-pause-vote-timeout-ms 0) paused after finalizercNode was shutdown"
heifner marked this conversation as resolved.
Show resolved Hide resolved
# Verify node0 and node1 still producing
assert node0.waitForHeadToAdvance(), "node0 paused after finalizercNode was shutdown"
assert node1.waitForHeadToAdvance(), "node1 paused after finalizercNode was shutdown"

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails)

exitCode = 0 if testSuccessful else 1
exit(exitCode)
118 changes: 118 additions & 0 deletions tests/production_pause_vote_timeout_test_shape.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
"name": "testnet_",
"ssh_helper": {
"ssh_cmd": "/usr/bin/ssh",
"scp_cmd": "/usr/bin/scp",
"ssh_identity": "",
"ssh_args": ""
},
"nodes": {
"bios":{
"name": "bios",
"keys": [
{
"privkey":"5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3",
"pubkey":"EOS6MRyAjQq8ud7hVNYcfnVPJqcVpscN5So8BhtHuGYqET5GDW5CV"
}
],
"peers": [],
"producers": [
"eosio"
],
"dont_start": false
},
"testnet_00":{
"name": "testnet_00",
"keys": [
{
"privkey":"5Jf4sTk7vwX1MYpLJ2eQFanVvKYXFqGBrCyANPukuP2BJ5WAAKZ",
"pubkey":"EOS58B33q9S7oNkgeFfcoW3VJYu4obfDiqn5RHGE2ige6jVjUhymR",
"blspubkey":"PUB_BLS_rYRa_-bT7uLOSAfPIBy6NlXFB0YxwROeSuqHzw6s-1cuK_-GJUKqp20ktyAnsO4ZuHdx3BEPDaLronpnL22MXKWM7bvZnkCfbGCD6OzizQqxXkM9N5z5R-OUA4Ime6cF5YTSFg",
"blsprivkey":"PVT_BLS_GQjR0E8Hu8KrsTCvLKnlOCIwQijAj2-5KDizQwF-bAY6pise",
"blspop":"SIG_BLS_syFMuifUnX2zQQKr0cuHYzQQjsuPrNG75_z6y8fOyYg_twqMICZ0kT7ObbwIOUsLfXx9PVb4-QLEgUYGSRg1NSfeHGjIGkhea82wa3ayfI8elUEU1MStKbeKpys7xUAQz1PEgwcz5dClq3HyLQmMAjpoL74N_Znf0KiNEVZMte-DLF7x_6sAfp_834LthyYHjZYTmdG7belyzlYHKJb6upnZy9nR_zoKpx9jeTd3tzVhoTCuAN6aFw68D_ItY5cWiY2dhA"
}
],
"peers": [
"bios",
"testnet_04"
],
"producers": [
"defproducera"
],
"dont_start": false
},
"testnet_01":{
"name": "testnet_01",
"keys": [
{
"pubkey": "EOS8XH2gKxsef9zxmMHm4vaSvxQUhg7W4GC3nK2KSRxyYrNG5gZFS",
"privkey": "5JcoRRhDcgm51dkBrRTmErceTqrYhrq22UnmUjTZToMpH91B9N1",
"blspubkey":"PUB_BLS_Wf_O_QeyVhekDXS5q3qBxTyj_qxSrX_uiCY4z8ClpW0X2jrAVgAVHOQ9IR2H40QTWveD8QIGhhSbmSFPa0zFbs5k3yfnjfuuwpA7T1O13_LSdtxT19ehYiE4chZX6SUMJ09JFA",
"blsprivkey":"PVT_BLS_1ZLWim0k80ssXswSZp1T3ydHO9U3gLnKKlEBIDy8927XDLLj",
"blspop":"SIG_BLS_EL09aI3w-qCgarLM2Z5-T6sisSHBN0J4vMZxtGQklkOcAxgnCaPPXe0roxY4W0gVe2y6T01YrklmT_qZu2tAwqiNrVJcScY8QKvRSeczGBBab1MgnHvaAOuf6bA4JPAELIu2iPWfsS6-oLyLbNP5xtZpMXPHu3yaSJssXNOb5rcVs1KXaIUEagJeAlBBQEcKmFWfeAsJ_R8JDw4i9gSNmROzUjm6LVBpvB7vrnPDPFRA0BQ19H4FED6PtuFPShwJGVz4dg"
}
],
"peers": [
"bios",
"testnet_00",
"testnet_04"
],
"producers": [
"defproducerb"
],
"dont_start": false
},
"testnet_02":{
"name": "testnet_02",
"keys": [
{
"pubkey": "EOS6Tkpf8kcDfa32WA9B4nTcEJ64ZdDMSNioDcaL6rzdMwnpzaWJB",
"privkey": "5KkQbdxFHr8Pg1N3DEMDdU7emFgUTwQvh99FDJrodFhUbbsAtQT"
}
],
"peers": [
"bios",
"testnet_04"
],
"producers": [
"defproducerc"
],
"dont_start": false
},
"testnet_03":{
"name": "testnet_03",
"keys": [
{
"pubkey": "EOS52ntDHqA2qj4xVo7KmxdezMRhvvBqpZBuKYJCsgihisxmywpAx",
"privkey": "5JxTJJegQBpEL1p77TzkN1ompMB9gDwAfjM9chPzFCB4chxmwrE",
"blspubkey":"PUB_BLS_C-FprIiry6X-8dlLYH7xUAhIuKXBQv56zJPgtcdmKeHf8AAy750eRrOYBtKG0-QEIN5l_yl9dTLvAYmOios6Q5t3ybWBUVVQ2WWcbZLVxzwBftLwYvo1zPXH7LHEE_sAgP1i7g",
"blsprivkey":"PVT_BLS_ubElmjajfsYP_9HRSpmV-Fi_IPWKTyJS4XFSWrU8ezMZ_mL_",
"blspop":"SIG_BLS_k3wrhVl2GUG_lGsPr9io-zoamPw7eiaxMDExk-yOqcpXtu0zALHoUWJRh0WOerAS1-_RQNhbi4q-BWO9IbiNWRKP9CYIhNIL6ochGHHy4aBmZ-IzEjfBrDt7inDtFTYY0Gl372e5OqPXAwi6J3GeHipXuzAiw7SV8XdWFefthxId4meKX6vw5_RWx4XQ4ScRYoCG7UQtIZkQPEsu1SfJGL6z-cfTTSq-naKbzp0QQYfqtQkFfmL7qQUH1iohnb0HbTbRbQ"
}
],
"peers": [
"bios",
"testnet_02",
"testnet_04"
],
"producers": [
],
"dont_start": false
},
"testnet_04":{
"name": "testnet_04",
"keys": [
{
"pubkey":"EOS7K5pQCk22ojetRdyumrqp6nJX6eiQiTWWcGkZAMGhoBxgcsxhK",
"privkey":"5K3h9XiAmrx9EuqD8CRxHgQwEVDaWpqrhrnpdvwHtVzwJFMhNmE"
}
],
"peers": [
"bios"
],
"producers": [
],
"dont_start": false
}
}
}
Loading