From 148ac237bede6127ad42265f0ead0e929abcf1b9 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 13 Oct 2023 17:47:01 +0800 Subject: [PATCH] chain: wait for `bitcoind` to finish loading blocks during startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We sometimes see this error in test, ``` --- FAIL: TestBitcoindEvents (7.72s) --- FAIL: TestBitcoindEvents/Events_via_RPC_Polling (1.00s) bitcoind_events_test.go:480: Error Trace: /home/runner/work/btcwallet/btcwallet/chain/bitcoind_events_test.go:480 /home/runner/work/btcwallet/btcwallet/chain/bitcoind_events_test.go:49 Error: Received unexpected error: -28: Loading block index… Test: TestBitcoindEvents/Events_via_RPC_Polling FAIL ``` which can also happen in real life if `bitcoind` is still loading blocks. This commit adds a retry logic to wait for `bitcoind` to finish its startup. --- chain/bitcoind_conn.go | 66 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/chain/bitcoind_conn.go b/chain/bitcoind_conn.go index a59c335726..88edf1b05a 100644 --- a/chain/bitcoind_conn.go +++ b/chain/bitcoind_conn.go @@ -1,10 +1,13 @@ package chain import ( + "errors" "fmt" "net" + "strings" "sync" "sync/atomic" + "time" "github.com/btcsuite/btcd/btcjson" "github.com/btcsuite/btcd/chaincfg" @@ -38,8 +41,21 @@ const ( // errBlockPrunedStr is the error message returned by bitcoind upon // calling GetBlock on a pruned block. errBlockPrunedStr = "Block not available (pruned data)" + + // errStillLoadingCode is the error code returned when an RPC request + // is made but bitcoind is still in the process of loading or verifying + // blocks. + errStillLoadingCode = "-28" + + // bitcoindStartTimeout is the time we wait for bitcoind to finish + // loading and verifying blocks and become ready to serve RPC requests. + bitcoindStartTimeout = 30 * time.Second ) +// ErrBitcoindStartTimeout is returned when the bitcoind daemon fails to load +// and verify blocks under 30s during startup. +var ErrBitcoindStartTimeout = errors.New("bitcoind start timeout") + // BitcoindConfig contains all of the parameters required to establish a // connection to a bitcoind's RPC. type BitcoindConfig struct { @@ -311,9 +327,57 @@ func (c *BitcoindConn) sendTxToClients() { } } +// getBlockHashDuringStartup is used to call the getblockhash RPC during +// startup. It catches the case where bitcoind is still in the process of +// loading blocks, which returns the following error, +// - "-28: Loading block index..." +// - "-28: Verifying blocks..." +// In this case we'd retry every second until we time out after 30 seconds. +func getBlockHashDuringStartup( + client *rpcclient.Client) (*chainhash.Hash, error) { + + hash, err := client.GetBlockHash(0) + + // Exit early if there's no error. + if err == nil { + return hash, nil + } + + // If the error doesn't start with "-28", it's an unexpected error so + // we exit with it. + if !strings.Contains(err.Error(), errStillLoadingCode) { + return nil, err + } + + // Starts the timeout ticker(30s). + timeout := time.After(bitcoindStartTimeout) + + // Otherwise, we'd retry calling getblockhash or time out after 30s. + for { + select { + case <-timeout: + return nil, ErrBitcoindStartTimeout + + // Retry every second. + case <-time.After(1 * time.Second): + hash, err = client.GetBlockHash(0) + // If there's no error, we return the hash. + if err == nil { + return hash, nil + } + + // Otherwise, retry until we time out. We also check if + // the error returned here is still expected. + if !strings.Contains(err.Error(), errStillLoadingCode) { + return nil, err + } + } + } +} + // getCurrentNet returns the network on which the bitcoind node is running. func getCurrentNet(client *rpcclient.Client) (wire.BitcoinNet, error) { - hash, err := client.GetBlockHash(0) + hash, err := getBlockHashDuringStartup(client) if err != nil { return 0, err }