Skip to content

Commit

Permalink
Beacon sync fix overlapping block list import (#2866)
Browse files Browse the repository at this point in the history
* Ignore `FC` overlapping blocks and the ones <= `base`

why:
  Due to concurrently running `importBlock()` by `newPayload` RPC
  requests the `FC` module layout might differ when re-visiting for
  importing blocks.

* Update logging and docu

details:
 Reduce some logging noise
 Clarify activating/suspending syncer in log messages
  • Loading branch information
mjfh authored Nov 22, 2024
1 parent 652539e commit 81690e0
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 13 deletions.
10 changes: 7 additions & 3 deletions nimbus/sync/beacon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,21 @@ Implementation, The Gory Details
The following diagram depicts a most general state view of the sync and the
*FC* modules and at a given point of time

0 L (5)
o--------------------o
0 B L (5)
o---------o----------o
| <--- imported ---> |
C D H
o---------------------o----------------o
| <-- unprocessed --> | <-- linked --> |

where

* *B* -- **base**, current value of this entity (with the same name) of the
**FC** module (i.e. the current value when looked up.)

* *C* -- coupler, parent of the left endpoint of the chain of headers or blocks
to be fetched and imported.
to be fetched and imported. The block number of *C* is somewhere
between the ones of *B* and *C* inclusive.

* *L* -- **latest**, current value of this entity (with the same name) of the
**FC** module (i.e. the current value when looked up.) *L* need not
Expand Down
17 changes: 15 additions & 2 deletions nimbus/sync/beacon/worker/blocks_staged.nim
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,10 @@ proc blocksStagedImport*(
# Fetch least record, accept only if it matches the global ledger state
block:
let imported = ctx.chain.latestNumber()
if qItem.key != imported + 1:
if imported + 1 < qItem.key:
trace info & ": there is a gap L vs. staged",
B=ctx.chain.baseNumber.bnStr, L=imported.bnStr, staged=qItem.key.bnStr,
C=ctx.layout.coupler.bnStr
doAssert imported < qItem.key
return false

# Remove from queue
Expand All @@ -275,7 +274,21 @@ proc blocksStagedImport*(
var maxImport = iv.maxPt
block importLoop:
for n in 0 ..< nBlocks:
# It is known that `key <= imported + 1`. This means that some blocks
# potentally overlap with what is already known by `FC` (e.g. due to
# concurrently running `importBlock()` by a `newPayload` RPC requests.)
#
# It is not left to `FC` to ignore this record. Passing a block before
# the `base` (which also might have changed) is responded by `FC` with
# an error. This would cause throwing away all `nBlocks` rather than
# ignoring the first some.
#
let nBn = qItem.data.blocks[n].header.number
if nBn <= ctx.chain.baseNumber:
trace info & ": ignoring block <= base", n, iv,
B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr,
nthBn=nBn.bnStr, nthHash=qItem.data.getNthHash(n)
continue
ctx.pool.chain.importBlock(qItem.data.blocks[n]).isOkOr:
warn info & ": import block error", n, iv,
B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr,
Expand Down
4 changes: 2 additions & 2 deletions nimbus/sync/beacon/worker/db.nim
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ proc dbLoadSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]): bool =
# Add interval of unprocessed header range `(C,D)` from `README.md`
ctx.headersUnprocSet(ctx.layout.coupler+1, ctx.layout.dangling-1)

trace info & ": restored sync state", L=latest.bnStr,
trace info & ": restored syncer state", L=latest.bnStr,
C=ctx.layout.coupler.bnStr, D=ctx.layout.dangling.bnStr,
F=ctx.layout.final.bnStr, H=ctx.layout.head.bnStr
H=ctx.layout.head.bnStr

true

Expand Down
2 changes: 0 additions & 2 deletions nimbus/sync/beacon/worker/start_stop.nim
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ proc setupDatabase*(ctx: BeaconCtxRef; info: static[string]) =
# system becomes fully active. Otherwise there is some polling only waiting
# for a new target so there is reduced service (aka `hibernate`.).
ctx.hibernate = not ctx.dbLoadSyncStateLayout info
if ctx.hibernate:
trace info & ": hibernating", latest=ctx.chain.latestNumber.bnStr

# Set blocks batch import value for block import
if ctx.pool.nBodiesBatch < nFetchBodiesRequest:
Expand Down
7 changes: 3 additions & 4 deletions nimbus/sync/beacon/worker/update.nim
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ proc startHibernating(ctx: BeaconCtxRef; info: static[string]) =

ctx.hibernate = true

trace info & ": hibernating, awaiting sync target",
L=ctx.chain.latestNumber.bnStr
trace info & ": suspending syncer", L=ctx.chain.latestNumber.bnStr

# Update, so it can be followed nicely
ctx.updateMetrics()
Expand Down Expand Up @@ -327,7 +326,7 @@ proc updateFinalBlockHeader*(
b = ctx.chain.baseNumber()
f = finHdr.number
if f < b:
trace info & ": finalised number too low",
trace info & ": finalised block # too low",
B=b.bnStr, finalised=f.bnStr, delta=(b - f)

ctx.target.reset
Expand All @@ -339,7 +338,7 @@ proc updateFinalBlockHeader*(
# Activate running (unless done yet)
if ctx.hibernate:
ctx.hibernate = false
trace info & ": activated syncer",
trace info & ": activating syncer", B=b.bnStr,
finalised=f.bnStr, head=ctx.target.consHead.bnStr

# Update, so it can be followed nicely
Expand Down

0 comments on commit 81690e0

Please sign in to comment.