From 34ff473ed282ce8698292eb23a52c22e29f06b3f Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Mon, 21 Oct 2024 13:38:20 -0700 Subject: [PATCH] fix: resolve indexer infinite loop on very slow drives or when run with limited resources, a node can have a delay between the block existing & being saved and the block_results getting saved. if the block exists, but the block_results do not, an infinite loop occurs. the indexer will repeatedly request the block and block_results until they both exist. the lack of delay can further constrain the node's resources and result in many calls for block_results before they are committed. this commit updates the condition for waiting to include whenever an error occurred during indexing. if the indexer fails to find the block_results it will bombard the node with requests for it without backing off. this change causes errors to trigger a wait. after waiting for either a new block or for the timeout, the block results are more likely to exist. --- server/indexer_service.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/server/indexer_service.go b/server/indexer_service.go index 88e8c56240..d517c8216b 100644 --- a/server/indexer_service.go +++ b/server/indexer_service.go @@ -23,6 +23,7 @@ import ( "github.com/cenkalti/backoff/v4" "github.com/cometbft/cometbft/libs/service" rpcclient "github.com/cometbft/cometbft/rpc/client" + coretypes "github.com/cometbft/cometbft/rpc/core/types" "github.com/cometbft/cometbft/types" ethermint "github.com/evmos/ethermint/types" @@ -112,8 +113,15 @@ func (eis *EVMIndexerService) OnStart() error { lastBlock = latestBlock } for { - if latestBlock <= lastBlock { - // nothing to index. wait for signal of new block + var block *coretypes.ResultBlock + var blockResult *coretypes.ResultBlockResults + if latestBlock <= lastBlock || err != nil { + // two cases: + // 1. nothing to index (indexer is caught up). wait for signal of new block. + // 2. previous attempt to index errored (failed to fetch the Block or BlockResults). + // in this case, wait before retrying the data fetching, rather than infinite looping + // a failing fetch. this can occur due to drive latency between the block existing and its + // block_results getting saved. select { case <-newBlockSignal: case <-time.After(NewBlockWaitTimeout): @@ -121,12 +129,12 @@ func (eis *EVMIndexerService) OnStart() error { continue } for i := lastBlock + 1; i <= latestBlock; i++ { - block, err := eis.client.Block(ctx, &i) + block, err = eis.client.Block(ctx, &i) if err != nil { eis.Logger.Error("failed to fetch block", "height", i, "err", err) break } - blockResult, err := eis.client.BlockResults(ctx, &i) + blockResult, err = eis.client.BlockResults(ctx, &i) if err != nil { eis.Logger.Error("failed to fetch block result", "height", i, "err", err) break