Skip to content

Commit

Permalink
Address code review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Yacov Manevich <[email protected]>
  • Loading branch information
yacovm committed Sep 24, 2024
1 parent ff8d038 commit f732a03
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 43 deletions.
49 changes: 25 additions & 24 deletions snow/engine/snowman/straggler_detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ import (
)

const (
minStragglerCheckInterval = time.Second
stakeThresholdForStragglerSuspicion = 0.75
minStragglerCheckInterval = time.Second
stakeThresholdForStragglerSuspicion = 0.75
minimumStakeThresholdRequiredForNetworkInfo = 0.8
)

type stragglerDetectorConfig struct {
Expand All @@ -32,16 +33,16 @@ type stragglerDetectorConfig struct {
// log logs events
log logging.Logger

// minConfirmationThreshold is the minimum percentage that below it, we do not check if we are stragglers.
// minConfirmationThreshold is the minimum stake percentage that below it, we do not check if we are stragglers.
minConfirmationThreshold float64

// connectedPercent returns the percent of connected nodes.
// connectedPercent returns the stake percentage of connected nodes.
connectedPercent func() float64

// connectedValidators returns a set of tuples of NodeID and corresponding weight.
connectedValidators func() set.Set[ids.NodeWeight]

// lastAcceptedByNodeID returns the last reported height a node has reported, or false if it is unknown.
// lastAcceptedByNodeID returns the last accepted height a node has reported, or false if it is unknown.
lastAcceptedByNodeID func(id ids.NodeID) (ids.ID, bool)

// processing returns whether this block ID is known and its descendants have not yet been accepted by consensus.
Expand All @@ -58,7 +59,7 @@ type stragglerDetectorConfig struct {
// or false if it fails from some reason.
getSnapshot func() (snapshot, bool)

// haveWeFailedCatchingUp returns whether we have not replicated enough blocks of the given snapshot
// haveWeFailedCatchingUp returns whether we have not replicated enough blocks of the given snapshot
haveWeFailedCatchingUp func(snapshot) bool
}

Expand Down Expand Up @@ -174,27 +175,27 @@ func (sd *stragglerDetector) failedCatchingUp(s snapshot) bool {
}

func (sd *stragglerDetector) getNetworkSnapshot() (snapshot, bool) {
nodeWeight2lastAccepted, totalValidatorWeight, _ := sd.getNetworkInfo()
if len(nodeWeight2lastAccepted) == 0 {
nodeWeightToLastAccepted, totalValidatorWeight := sd.getNetworkInfo()
if len(nodeWeightToLastAccepted) == 0 {
return snapshot{}, false
}

ourLastAcceptedBlock := sd.lastAccepted()

prevLastAcceptedCount := len(nodeWeight2lastAccepted)
for k, v := range nodeWeight2lastAccepted {
prevLastAcceptedCount := len(nodeWeightToLastAccepted)
for k, v := range nodeWeightToLastAccepted {
if ourLastAcceptedBlock.Compare(v) == 0 {
delete(nodeWeight2lastAccepted, k)
delete(nodeWeightToLastAccepted, k)
}
}
newLastAcceptedCount := len(nodeWeight2lastAccepted)
newLastAcceptedCount := len(nodeWeightToLastAccepted)

sd.log.Trace("Excluding nodes with our own height", zap.Int("prev", prevLastAcceptedCount), zap.Int("new", newLastAcceptedCount))

// Ensure we have collected last accepted blocks that are not our own last accepted block
// for at least 80% stake of the total weight we are connected to.

totalWeightWeKnowItsLastAcceptedBlock, err := nodeWeight2lastAccepted.totalWeight()
totalWeightWeKnowItsLastAcceptedBlock, err := nodeWeightToLastAccepted.totalWeight()
if err != nil {
sd.log.Error("Failed computing total weight", zap.Error(err))
return snapshot{}, false
Expand All @@ -209,7 +210,7 @@ func (sd *stragglerDetector) getNetworkSnapshot() (snapshot, bool) {
}

snap := snapshot{
nodeWeights2Blocks: nodeWeight2lastAccepted,
nodeWeights2Blocks: nodeWeightToLastAccepted,
totalValidatorWeight: totalValidatorWeight,
}

Expand All @@ -220,14 +221,14 @@ func (sd *stragglerDetector) getNetworkSnapshot() (snapshot, bool) {
return snapshot{}, false
}

func (sd *stragglerDetector) getNetworkInfo() (nodeWeights2Blocks, uint64, uint64) {
func (sd *stragglerDetector) getNetworkInfo() (nodeWeights2Blocks, uint64) {
ratio := sd.connectedPercent()
if ratio < sd.minConfirmationThreshold {
// We don't know for sure whether we're behind or not.
// Even if we're behind, it's pointless to act before we have established
// connectivity with enough validators.
// connectivity with enough validators.
sd.log.Verbo("not enough connected stake to determine network info", zap.Float64("ratio", ratio))
return nil, 0, 0
return nil, 0
}

validators := nodeWeights(sd.connectedValidators().List())
Expand All @@ -245,29 +246,29 @@ func (sd *stragglerDetector) getNetworkInfo() (nodeWeights2Blocks, uint64, uint6
totalValidatorWeight, err := validators.totalWeight()
if err != nil {
sd.log.Error("Failed computing total weight", zap.Error(err))
return nil, 0, 0
return nil, 0
}

totalWeightWeKnowItsLastAcceptedBlock, err := nodeWeight2lastAccepted.totalWeight()
if err != nil {
sd.log.Error("Failed computing total weight", zap.Error(err))
return nil, 0, 0
return nil, 0
}

if totalValidatorWeight == 0 {
sd.log.Trace("Connected to zero weight")
return nil, 0, 0
return nil, 0
}

ratio = float64(totalWeightWeKnowItsLastAcceptedBlock) / float64(totalValidatorWeight)

// Ensure we have collected last accepted blocks for at least 80% stake of the total weight we are connected to.
if ratio < 0.8 {
// Ensure we have collected last accepted blocks for at least 80% (or so) stake of the total weight we are connected to.
if ratio < minimumStakeThresholdRequiredForNetworkInfo {
sd.log.Trace("Not collected enough information about last accepted blocks for the validators we are connected to",
zap.Float64("ratio", ratio))
return nil, 0, 0
return nil, 0
}
return nodeWeight2lastAccepted, totalValidatorWeight, totalWeightWeKnowItsLastAcceptedBlock
return nodeWeight2lastAccepted, totalValidatorWeight
}

type snapshot struct {
Expand Down
30 changes: 13 additions & 17 deletions snow/engine/snowman/straggler_detect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,9 @@ func TestNodeWeights2Blocks(t *testing.T) {
}

func TestGetNetworkSnapshot(t *testing.T) {
n1, err := ids.NodeIDFromString("NodeID-N5gc5soT3Gpr98NKpqvQQG2SgGrVPL64w")
require.NoError(t, err)
n1 := ids.GenerateTestNodeID()

n2, err := ids.NodeIDFromString("NodeID-NpagUxt6KQiwPch9Sd4osv8kD1TZnkjdk")
require.NoError(t, err)
n2 := ids.GenerateTestNodeID()

connectedValidators := func(s []ids.NodeWeight) func() set.Set[ids.NodeWeight] {
return func() set.Set[ids.NodeWeight] {
Expand Down Expand Up @@ -150,11 +148,9 @@ func TestGetNetworkSnapshot(t *testing.T) {
}

func TestFailedCatchingUp(t *testing.T) {
n1, err := ids.NodeIDFromString("NodeID-N5gc5soT3Gpr98NKpqvQQG2SgGrVPL64w")
require.NoError(t, err)
n1 := ids.GenerateTestNodeID()

n2, err := ids.NodeIDFromString("NodeID-NpagUxt6KQiwPch9Sd4osv8kD1TZnkjdk")
require.NoError(t, err)
n2 := ids.GenerateTestNodeID()

for _, testCase := range []struct {
description string
Expand Down Expand Up @@ -300,33 +296,33 @@ func TestCheckIfWeAreStragglingBehind(t *testing.T) {
for _, testCase := range []struct {
description string
timeAdvanced time.Duration
evalExtraAssertions func()
evalExtraAssertions func(t *testing.T)
expectedStragglingTime time.Duration
snapshotsRead []snapshot
haveWeFailedCatchingUpReturns bool
}{
{
description: "First invocation only sets the time",
evalExtraAssertions: func() {},
evalExtraAssertions: func(t *testing.T) {},

Check failure on line 306 in snow/engine/snowman/straggler_detect_test.go

View workflow job for this annotation

GitHub Actions / Lint

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
},
{
description: "Should not check yet, as it is not time yet",
timeAdvanced: time.Millisecond * 500,
evalExtraAssertions: func() {},
evalExtraAssertions: func(t *testing.T) {},

Check failure on line 311 in snow/engine/snowman/straggler_detect_test.go

View workflow job for this annotation

GitHub Actions / Lint

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
},
{
description: "Advance time some more, so now we should check",
timeAdvanced: time.Millisecond * 501,
snapshotsRead: []snapshot{{}},
evalExtraAssertions: func() {
evalExtraAssertions: func(t *testing.T) {
require.Contains(t, buff.String(), "No node snapshot obtained")
},
},
{
description: "Advance time some more to the first check where the snapshot isn't empty",
timeAdvanced: time.Second * 2,
snapshotsRead: []snapshot{nonEmptySnap},
evalExtraAssertions: func() {
evalExtraAssertions: func(t *testing.T) {
require.Empty(t, buff.String())
},
},
Expand All @@ -335,7 +331,7 @@ func TestCheckIfWeAreStragglingBehind(t *testing.T) {
timeAdvanced: time.Second * 2,
expectedStragglingTime: time.Second * 2,
haveWeFailedCatchingUpReturns: true,
evalExtraAssertions: func() {
evalExtraAssertions: func(t *testing.T) {
require.Empty(t, sd.prevSnapshot)
},
},
Expand All @@ -346,12 +342,12 @@ func TestCheckIfWeAreStragglingBehind(t *testing.T) {
// We carry over the total straggling time from previous testCase to this check,
// as we need the next check to nullify it.
expectedStragglingTime: time.Second * 2,
evalExtraAssertions: func() {},
evalExtraAssertions: func(t *testing.T) {},

Check failure on line 345 in snow/engine/snowman/straggler_detect_test.go

View workflow job for this annotation

GitHub Actions / Lint

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
},
{
description: "The fourth check returns we have succeeded in catching up",
timeAdvanced: time.Second * 2,
evalExtraAssertions: func() {},
evalExtraAssertions: func(t *testing.T) {},

Check failure on line 350 in snow/engine/snowman/straggler_detect_test.go

View workflow job for this annotation

GitHub Actions / Lint

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
},
} {
t.Run(testCase.description, func(t *testing.T) {
Expand All @@ -365,7 +361,7 @@ func TestCheckIfWeAreStragglingBehind(t *testing.T) {

haveWeFailedCatchingUpReturns = testCase.haveWeFailedCatchingUpReturns
require.Equal(t, testCase.expectedStragglingTime, sd.CheckIfWeAreStragglingBehind())
testCase.evalExtraAssertions()
testCase.evalExtraAssertions(t)

// Cleanup the log buffer, and make sure no snapshots remain for next testCase.
buff.Reset()
Expand Down
4 changes: 2 additions & 2 deletions snow/networking/handler/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ func (h *handler) getDisconnectedValidators() set.Set[ids.NodeID] {
connectedVdrs := h.peerTracker.ConnectedValidators()
// vdrs - connectedVdrs is equal to the disconnectedVdrs
vdrs.Difference(connectedVdrs)
return trimWeights(vdrs)
return withoutWeights(vdrs)
}

func trimWeights(weights set.Set[ids.NodeWeight]) set.Set[ids.NodeID] {
func withoutWeights(weights set.Set[ids.NodeWeight]) set.Set[ids.NodeID] {
var res set.Set[ids.NodeID]
for _, nw := range weights.List() {
res.Add(nw.Node)
Expand Down

0 comments on commit f732a03

Please sign in to comment.